From 0eae3f72f2ac729b17c7c97874569ef06384c702 Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Wed, 11 Feb 2026 11:48:16 +0100 Subject: [PATCH 01/37] fix: correct frame data extraction --- .../object_detection/ObjectDetection.cpp | 123 ++++++++++++++++-- 1 file changed, 111 insertions(+), 12 deletions(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp index 8b5bc022f..f17a4f074 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp @@ -2,14 +2,17 @@ #include #include +#include #include +#include +#include namespace rnexecutorch::models::object_detection { ObjectDetection::ObjectDetection( const std::string &modelSource, std::shared_ptr callInvoker) - : BaseModel(modelSource, callInvoker) { + : VisionModel(modelSource, callInvoker) { auto inputTensors = getAllInputShapes(); if (inputTensors.size() == 0) { throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs, @@ -29,14 +32,49 @@ ObjectDetection::ObjectDetection( modelInputShape[modelInputShape.size() - 2]); } +cv::Mat ObjectDetection::preprocessFrame(const cv::Mat &frame) const { + // Get target size from model input shape + const std::vector tensorDims = getAllInputShapes()[0]; + cv::Size tensorSize = cv::Size(tensorDims[tensorDims.size() - 1], + tensorDims[tensorDims.size() - 2]); + + cv::Mat rgb; + + // Convert RGBA/BGRA to RGB if needed (for VisionCamera frames) + if (frame.channels() == 4) { +// Platform-specific color conversion: +// iOS uses BGRA format, Android uses RGBA format +#ifdef __APPLE__ + // iOS: BGRA β†’ RGB + cv::cvtColor(frame, rgb, cv::COLOR_BGRA2RGB); +#else + // Android: RGBA β†’ RGB + cv::cvtColor(frame, rgb, cv::COLOR_RGBA2RGB); +#endif + } else if (frame.channels() == 3) { + // Already RGB + rgb = frame; + } else { + char errorMessage[100]; + std::snprintf(errorMessage, sizeof(errorMessage), + "Unsupported frame format: %d channels", frame.channels()); + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + errorMessage); + } + + // Only resize if dimensions don't match + if (rgb.size() != tensorSize) { + cv::Mat resized; + cv::resize(rgb, resized, tensorSize); + return resized; + } + + return rgb; +} + std::vector ObjectDetection::postprocess(const std::vector &tensors, cv::Size originalSize, double detectionThreshold) { - if (detectionThreshold <= 0 || detectionThreshold > 1) { - throw RnExecutorchError(RnExecutorchErrorCode::InvalidConfig, - "Detection threshold must be greater than 0 " - "and less than or equal to 1."); - } float widthRatio = static_cast(originalSize.width) / modelImageSize.width; float heightRatio = @@ -70,14 +108,23 @@ ObjectDetection::postprocess(const std::vector &tensors, scores[i]); } - std::vector output = utils::nonMaxSuppression(detections); - return output; + return utils::nonMaxSuppression(detections); } std::vector -ObjectDetection::generate(std::string imageSource, double detectionThreshold) { - auto [inputTensor, originalSize] = - image_processing::readImageToTensor(imageSource, getAllInputShapes()[0]); +ObjectDetection::runInference(cv::Mat image, double detectionThreshold) { + std::lock_guard lock(inference_mutex_); + + // Store original size for postprocessing + cv::Size originalSize = image.size(); + + // Preprocess the image using model-specific preprocessing + cv::Mat preprocessed = preprocessFrame(image); + + // Create tensor and run inference + const std::vector tensorDims = getAllInputShapes()[0]; + auto inputTensor = + image_processing::getTensorFromMatrix(tensorDims, preprocessed); auto forwardResult = BaseModel::forward(inputTensor); if (!forwardResult.ok()) { @@ -88,4 +135,56 @@ ObjectDetection::generate(std::string imageSource, double detectionThreshold) { return postprocess(forwardResult.get(), originalSize, detectionThreshold); } -} // namespace rnexecutorch::models::object_detection + +std::vector +ObjectDetection::generateFromString(std::string imageSource, + double detectionThreshold) { + // Read image using OpenCV (BGR format) + cv::Mat image = image_processing::readImage(imageSource); + + // Convert BGR to RGB (OpenCV imread returns BGR) + cv::Mat imageRGB; + cv::cvtColor(image, imageRGB, cv::COLOR_BGR2RGB); + + // Use the internal helper - it handles locking, preprocessing, and inference + return runInference(imageRGB, detectionThreshold); +} + +std::vector +ObjectDetection::generateFromFrame(jsi::Runtime &runtime, + const jsi::Value &frameData, + double detectionThreshold) { + // Try-lock: skip frame if model is busy (non-blocking for camera) + if (!inference_mutex_.try_lock()) { + return {}; // Return empty vector, don't block camera thread + } + + // Extract frame (under lock to ensure thread safety) + cv::Mat frame; + { + std::lock_guard lock(inference_mutex_, std::adopt_lock); + auto frameObj = frameData.asObject(runtime); + frame = + rnexecutorch::utils::FrameProcessor::extractFrame(runtime, frameObj); + } + // Lock is automatically released here when going out of scope + + // Use the internal helper - it handles locking, preprocessing, and inference + return runInference(frame, detectionThreshold); +} + +std::vector +ObjectDetection::generateFromPixels(jsi::Runtime &runtime, + const jsi::Value &pixelData, + double detectionThreshold) { + // Convert JSI value to JSTensorViewIn + auto tensorView = + jsi_conversion::getValue(pixelData, runtime); + + // Extract raw pixel data to cv::Mat + cv::Mat image = extractFromPixels(tensorView); + + // Use the internal helper - it handles locking, preprocessing, and inference + return runInference(image, detectionThreshold); +} +} // namespace rnexecutorch::models::object_detection \ No newline at end of file From 65667ad331dd55635bb97c345b27b97eff8b0789 Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Thu, 12 Feb 2026 14:24:02 +0100 Subject: [PATCH 02/37] feat: frame extractor for zero-copy approach --- .../rnexecutorch/utils/FrameExtractor.cpp | 151 ++++++++++++++++++ .../rnexecutorch/utils/FrameExtractor.h | 60 +++++++ .../src/types/common.ts | 33 ++++ yarn.lock | 11 +- 4 files changed, 245 insertions(+), 10 deletions(-) create mode 100644 packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp create mode 100644 packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.h diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp new file mode 100644 index 000000000..f64855131 --- /dev/null +++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp @@ -0,0 +1,151 @@ +#include "FrameExtractor.h" +#include + +#ifdef __APPLE__ +#import +#endif + +#ifdef __ANDROID__ +#if __ANDROID_API__ >= 26 +#include +#endif +#endif + +namespace rnexecutorch { +namespace utils { + +cv::Mat FrameExtractor::extractFromNativeBuffer(uint64_t bufferPtr) { +#ifdef __APPLE__ + return extractFromCVPixelBuffer(reinterpret_cast(bufferPtr)); +#elif defined(__ANDROID__) + return extractFromAHardwareBuffer(reinterpret_cast(bufferPtr)); +#else + throw std::runtime_error("NativeBuffer not supported on this platform"); +#endif +} + +#ifdef __APPLE__ +cv::Mat FrameExtractor::extractFromCVPixelBuffer(void *pixelBuffer) { + CVPixelBufferRef buffer = static_cast(pixelBuffer); + + // Get buffer properties + size_t width = CVPixelBufferGetWidth(buffer); + size_t height = CVPixelBufferGetHeight(buffer); + size_t bytesPerRow = CVPixelBufferGetBytesPerRow(buffer); + OSType pixelFormat = CVPixelBufferGetPixelFormatType(buffer); + + // Lock the buffer (Vision Camera should have already locked it, but ensure) + CVPixelBufferLockBaseAddress(buffer, kCVPixelBufferLock_ReadOnly); + void *baseAddress = CVPixelBufferGetBaseAddress(buffer); + + cv::Mat mat; + + // Log pixel format once for debugging + static bool loggedPixelFormat = false; + if (!loggedPixelFormat) { + log(LOG_LEVEL::Debug, "CVPixelBuffer format code: ", pixelFormat); + loggedPixelFormat = true; + } + + if (pixelFormat == kCVPixelFormatType_32BGRA) { + // BGRA format (most common on iOS when using pixelFormat: 'rgb') + if (!loggedPixelFormat) { + log(LOG_LEVEL::Debug, "Extracting from CVPixelBuffer: BGRA format, ", + width, "x", height, ", stride: ", bytesPerRow); + } + mat = cv::Mat(static_cast(height), static_cast(width), CV_8UC4, + baseAddress, bytesPerRow); + } else if (pixelFormat == kCVPixelFormatType_32RGBA) { + // RGBA format + if (!loggedPixelFormat) { + log(LOG_LEVEL::Debug, "Extracting from CVPixelBuffer: RGBA format, ", + width, "x", height, ", stride: ", bytesPerRow); + } + mat = cv::Mat(static_cast(height), static_cast(width), CV_8UC4, + baseAddress, bytesPerRow); + } else if (pixelFormat == kCVPixelFormatType_24RGB) { + // RGB format + if (!loggedPixelFormat) { + log(LOG_LEVEL::Debug, "Extracting from CVPixelBuffer: RGB format, ", + width, "x", height, ", stride: ", bytesPerRow); + } + mat = cv::Mat(static_cast(height), static_cast(width), CV_8UC3, + baseAddress, bytesPerRow); + } else { + CVPixelBufferUnlockBaseAddress(buffer, kCVPixelBufferLock_ReadOnly); + throw std::runtime_error("Unsupported CVPixelBuffer format: " + + std::to_string(pixelFormat)); + } + + // Note: We don't unlock here - Vision Camera manages the lifecycle + // When frame.dispose() is called, Vision Camera will unlock and release + + return mat; +} +#endif + +#ifdef __ANDROID__ +cv::Mat FrameExtractor::extractFromAHardwareBuffer(void *hardwareBuffer) { +#if __ANDROID_API__ >= 26 + AHardwareBuffer *buffer = static_cast(hardwareBuffer); + + // Get buffer description + AHardwareBuffer_Desc desc; + AHardwareBuffer_describe(buffer, &desc); + + // Lock the buffer for CPU read access + void *data = nullptr; + int lockResult = AHardwareBuffer_lock( + buffer, AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, -1, nullptr, &data); + + if (lockResult != 0) { + throw std::runtime_error("Failed to lock AHardwareBuffer"); + } + + cv::Mat mat; + + // Log format once for debugging + static bool loggedFormat = false; + if (!loggedFormat) { + log(LOG_LEVEL::Debug, "AHardwareBuffer format code: ", desc.format); + loggedFormat = true; + } + + if (desc.format == AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM) { + // RGBA format (expected when using pixelFormat: 'rgb' on Android) + if (!loggedFormat) { + log(LOG_LEVEL::Debug, "Extracting from AHardwareBuffer: RGBA format, ", + desc.width, "x", desc.height, ", stride: ", desc.stride * 4); + } + mat = cv::Mat(desc.height, desc.width, CV_8UC4, data, desc.stride * 4); + } else if (desc.format == AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM) { + // RGBX format (treated as RGBA) + if (!loggedFormat) { + log(LOG_LEVEL::Debug, "Extracting from AHardwareBuffer: RGBX format, ", + desc.width, "x", desc.height, ", stride: ", desc.stride * 4); + } + mat = cv::Mat(desc.height, desc.width, CV_8UC4, data, desc.stride * 4); + } else if (desc.format == AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM) { + // RGB format (less common) + if (!loggedFormat) { + log(LOG_LEVEL::Debug, "Extracting from AHardwareBuffer: RGB format, ", + desc.width, "x", desc.height, ", stride: ", desc.stride * 3); + } + mat = cv::Mat(desc.height, desc.width, CV_8UC3, data, desc.stride * 3); + } else { + AHardwareBuffer_unlock(buffer, nullptr); + throw std::runtime_error("Unsupported AHardwareBuffer format: " + + std::to_string(desc.format)); + } + + // Note: We don't unlock here - Vision Camera manages the lifecycle + + return mat; +#else + throw std::runtime_error("AHardwareBuffer requires Android API 26+"); +#endif // __ANDROID_API__ >= 26 +} +#endif // __ANDROID__ + +} // namespace utils +} // namespace rnexecutorch diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.h b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.h new file mode 100644 index 000000000..a90e6ad23 --- /dev/null +++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.h @@ -0,0 +1,60 @@ +#pragma once + +#include +#include + +namespace rnexecutorch { +namespace utils { + +/** + * @brief Utility class for extracting cv::Mat from native platform buffers + * + * Provides zero-copy extraction of frames from: + * - iOS: CVPixelBufferRef + * - Android: AHardwareBuffer + */ +class FrameExtractor { +public: + /** + * @brief Extract cv::Mat from a native buffer pointer + * + * @param bufferPtr Platform-specific buffer pointer (uint64_t) + * - iOS: CVPixelBufferRef + * - Android: AHardwareBuffer* + * @return cv::Mat wrapping the buffer data (zero-copy) + * + * @note The returned cv::Mat does not own the data. + * The caller must ensure the buffer remains valid. + * @note The buffer must be locked before calling and unlocked after use. + */ + static cv::Mat extractFromNativeBuffer(uint64_t bufferPtr); + +#ifdef __APPLE__ + /** + * @brief Extract cv::Mat from CVPixelBuffer (iOS) + * + * @param pixelBuffer CVPixelBufferRef pointer + * @return cv::Mat wrapping the pixel buffer data + * + * @note Assumes buffer is already locked by Vision Camera + * @note Supports kCVPixelFormatType_32BGRA and kCVPixelFormatType_24RGB + */ + static cv::Mat extractFromCVPixelBuffer(void *pixelBuffer); +#endif + +#ifdef __ANDROID__ + /** + * @brief Extract cv::Mat from AHardwareBuffer (Android) + * + * @param hardwareBuffer AHardwareBuffer* pointer + * @return cv::Mat wrapping the hardware buffer data + * + * @note Assumes buffer is already locked by Vision Camera + * @note Supports AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM and R8G8B8_UNORM + */ + static cv::Mat extractFromAHardwareBuffer(void *hardwareBuffer); +#endif +}; + +} // namespace utils +} // namespace rnexecutorch diff --git a/packages/react-native-executorch/src/types/common.ts b/packages/react-native-executorch/src/types/common.ts index 384caa861..439e18597 100644 --- a/packages/react-native-executorch/src/types/common.ts +++ b/packages/react-native-executorch/src/types/common.ts @@ -151,3 +151,36 @@ export type LabelEnum = Readonly>; * @category Types */ export type Triple = readonly [T, T, T]; + +/** + * Frame data for vision model processing. + * Supports two modes: + * 1. ArrayBuffer mode (with memory copy) - Compatible with all platforms + * 2. NativeBuffer mode (zero-copy) - Better performance with Vision Camera v5 + */ +export interface FrameData { + /** + * Raw pixel data as ArrayBuffer (requires memory copy). + * Use this for compatibility or when getNativeBuffer is not available. + */ + data?: ArrayBuffer | ArrayBufferLike; + + /** + * Pointer to native platform buffer (zero-copy, best performance). + * - On iOS: CVPixelBufferRef pointer + * - On Android: AHardwareBuffer* pointer + * + * Obtain from Vision Camera v5: `frame.getNativeBuffer().pointer` + */ + nativeBuffer?: bigint; + + /** + * Frame width in pixels + */ + width: number; + + /** + * Frame height in pixels + */ + height: number; +} diff --git a/yarn.lock b/yarn.lock index 436005c8d..90ac56b11 100644 --- a/yarn.lock +++ b/yarn.lock @@ -13799,16 +13799,7 @@ __metadata: languageName: node linkType: hard -"semver@npm:^7.1.3, semver@npm:^7.3.5, semver@npm:^7.3.7, semver@npm:^7.5.2, semver@npm:^7.5.3, semver@npm:^7.5.4, semver@npm:^7.6.0, semver@npm:^7.7.1": - version: 7.7.3 - resolution: "semver@npm:7.7.3" - bin: - semver: bin/semver.js - checksum: 10/8dbc3168e057a38fc322af909c7f5617483c50caddba135439ff09a754b20bdd6482a5123ff543dad4affa488ecf46ec5fb56d61312ad20bb140199b88dfaea9 - languageName: node - linkType: hard - -"semver@npm:^7.7.3": +"semver@npm:^7.1.3, semver@npm:^7.3.5, semver@npm:^7.3.7, semver@npm:^7.5.2, semver@npm:^7.5.3, semver@npm:^7.5.4, semver@npm:^7.6.0, semver@npm:^7.7.1, semver@npm:^7.7.3": version: 7.7.4 resolution: "semver@npm:7.7.4" bin: From daed38a3bb216c6b4fa40354f029650c13265969 Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Fri, 13 Feb 2026 11:29:23 +0100 Subject: [PATCH 03/37] chore: num minSdkVersion to 26 --- packages/react-native-executorch/android/gradle.properties | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/react-native-executorch/android/gradle.properties b/packages/react-native-executorch/android/gradle.properties index b30a8b11d..97cdd1854 100644 --- a/packages/react-native-executorch/android/gradle.properties +++ b/packages/react-native-executorch/android/gradle.properties @@ -1,5 +1,5 @@ RnExecutorch_kotlinVersion=1.7.0 -RnExecutorch_minSdkVersion=21 +RnExecutorch_minSdkVersion=26 RnExecutorch_targetSdkVersion=31 RnExecutorch_compileSdkVersion=31 -RnExecutorch_ndkversion=21.4.7075529 +RnExecutorch_ndkversion=21.4.7075529 \ No newline at end of file From 3d534dea4875ac316bcf0d93a13ef924ec13d34d Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Mon, 16 Feb 2026 10:37:11 +0100 Subject: [PATCH 04/37] feat: unify frame extraction and preprocessing --- .../rnexecutorch/models/VisionModel.cpp | 20 +++ .../common/rnexecutorch/models/VisionModel.h | 139 +++++++++++++++++ .../models/classification/Classification.cpp | 2 +- .../rnexecutorch/utils/FrameProcessor.cpp | 142 ++++++++++++++++++ .../rnexecutorch/utils/FrameProcessor.h | 109 ++++++++++++++ .../src/modules/BaseModule.ts | 79 +++++++++- yarn.lock | 11 +- 7 files changed, 498 insertions(+), 4 deletions(-) create mode 100644 packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp create mode 100644 packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h create mode 100644 packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp create mode 100644 packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp new file mode 100644 index 000000000..671ed03c8 --- /dev/null +++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp @@ -0,0 +1,20 @@ +#include "VisionModel.h" +#include + +namespace rnexecutorch { +namespace models { + +using namespace facebook; + +cv::Mat VisionModel::extractAndPreprocess(jsi::Runtime &runtime, + const jsi::Value &frameData) const { + // Extract frame using FrameProcessor utility + auto frameObj = frameData.asObject(runtime); + cv::Mat frame = utils::FrameProcessor::extractFrame(runtime, frameObj); + + // Apply model-specific preprocessing + return preprocessFrame(frame); +} + +} // namespace models +} // namespace rnexecutorch diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h new file mode 100644 index 000000000..11da49547 --- /dev/null +++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h @@ -0,0 +1,139 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace rnexecutorch { +namespace models { + +/** + * @brief Base class for computer vision models that support real-time camera + * input + * + * VisionModel extends BaseModel with thread-safe inference and automatic frame + * extraction from VisionCamera. This class is designed for models that need to + * process camera frames in real-time (e.g., at 30fps). + * + * Thread Safety: + * - All inference operations are protected by a mutex + * - generateFromFrame() uses try_lock() to skip frames when the model is busy + * - This prevents blocking the camera thread and maintains smooth frame rates + * + * Usage: + * Subclasses should: + * 1. Inherit from VisionModel instead of BaseModel + * 2. Implement preprocessFrame() with model-specific preprocessing + * 3. Use inference_mutex_ when calling forward() in custom generate methods + * 4. Use lock_guard for blocking operations (JS API) + * 5. Use try_lock() for non-blocking operations (camera API) + * + * Example: + * @code + * class Classification : public VisionModel { + * public: + * std::unordered_map + * generateFromFrame(jsi::Runtime& runtime, const jsi::Value& frameValue) { + * // try_lock is handled automatically + * auto frameObject = frameValue.asObject(runtime); + * cv::Mat frame = FrameExtractor::extractFrame(runtime, frameObject); + * + * // Lock before inference + * if (!inference_mutex_.try_lock()) { + * return {}; // Skip frame if busy + * } + * std::lock_guard lock(inference_mutex_, std::adopt_lock); + * + * auto preprocessed = preprocessFrame(frame); + * // ... run inference + * } + * }; + * @endcode + */ +class VisionModel : public BaseModel { +public: + /** + * @brief Construct a VisionModel with the same parameters as BaseModel + * + * VisionModel uses the same construction pattern as BaseModel, just adding + * thread-safety on top. + */ + VisionModel(const std::string &modelSource, + std::shared_ptr callInvoker) + : BaseModel(modelSource, callInvoker) {} + + /** + * @brief Virtual destructor for proper cleanup in derived classes + */ + virtual ~VisionModel() = default; + +protected: + /** + * @brief Mutex to ensure thread-safe inference + * + * This mutex protects against race conditions when: + * - generateFromFrame() is called from VisionCamera worklet thread (30fps) + * - generate() is called from JavaScript thread simultaneously + * + * Usage guidelines: + * - Use std::lock_guard for blocking operations (JS API can wait) + * - Use try_lock() for non-blocking operations (camera should skip frames) + * + * @note Marked mutable to allow locking in const methods if needed + */ + mutable std::mutex inference_mutex_; + + /** + * @brief Preprocess a camera frame for model input + * + * This method should implement model-specific preprocessing such as: + * - Resizing to the model's expected input size + * - Color space conversion (e.g., BGR to RGB) + * - Normalization + * - Any other model-specific transformations + * + * @param frame Input frame from camera (already extracted and rotated by + * FrameExtractor) + * @return Preprocessed cv::Mat ready for tensor conversion + * + * @note The input frame is already in RGB format and rotated 90Β° clockwise + * @note This method is called under mutex protection in generateFromFrame() + */ + virtual cv::Mat preprocessFrame(const cv::Mat &frame) const = 0; + + /** + * @brief Extract and preprocess frame from VisionCamera in one call + * + * This is a convenience method that combines frame extraction and + * preprocessing. It handles both nativeBuffer (zero-copy) and ArrayBuffer + * paths automatically. + * + * @param runtime JSI runtime + * @param frameData JSI value containing frame data from VisionCamera + * + * @return Preprocessed cv::Mat ready for tensor conversion + * + * @throws std::runtime_error if frame extraction fails + * + * @note This method does NOT acquire the inference mutex - caller is + * responsible + * @note Typical usage: + * @code + * cv::Mat preprocessed = extractAndPreprocess(runtime, frameData); + * auto tensor = image_processing::getTensorFromMatrix(dims, preprocessed); + * @endcode + */ + cv::Mat extractAndPreprocess(jsi::Runtime &runtime, + const jsi::Value &frameData) const; +}; + +} // namespace models +// Register VisionModel constructor traits +// Even though VisionModel is abstract, the metaprogramming system needs to know +// its constructor signature for derived classes +REGISTER_CONSTRUCTOR(models::VisionModel, std::string, + std::shared_ptr); + +} // namespace rnexecutorch diff --git a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp index 0fba07108..b9fad1b88 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp @@ -73,4 +73,4 @@ Classification::postprocess(const Tensor &tensor) { return probs; } -} // namespace rnexecutorch::models::classification +} // namespace rnexecutorch::models::classification \ No newline at end of file diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp new file mode 100644 index 000000000..02faa072d --- /dev/null +++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp @@ -0,0 +1,142 @@ +#include "FrameProcessor.h" +#include "FrameExtractor.h" +#include +#include + +namespace rnexecutorch { +namespace utils { + +cv::Mat FrameProcessor::extractFrame(jsi::Runtime &runtime, + const jsi::Object &frameData) { + // Get frame dimensions + int width = + static_cast(frameData.getProperty(runtime, "width").asNumber()); + int height = + static_cast(frameData.getProperty(runtime, "height").asNumber()); + + // Try zero-copy path first (nativeBuffer) + if (hasNativeBuffer(runtime, frameData)) { + static bool loggedPath = false; + if (!loggedPath) { + log(LOG_LEVEL::Debug, "FrameProcessor: Using zero-copy nativeBuffer"); + loggedPath = true; + } + + try { + return extractFromNativeBuffer(runtime, frameData, width, height); + } catch (const std::exception &e) { + log(LOG_LEVEL::Debug, + "FrameProcessor: nativeBuffer extraction failed: ", e.what()); + log(LOG_LEVEL::Debug, "FrameProcessor: Falling back to ArrayBuffer"); + } + } + + // Fallback to ArrayBuffer path (with copy) + if (frameData.hasProperty(runtime, "data")) { + static bool loggedPath = false; + if (!loggedPath) { + log(LOG_LEVEL::Debug, "FrameProcessor: Using ArrayBuffer (with copy)"); + loggedPath = true; + } + + return extractFromArrayBuffer(runtime, frameData, width, height); + } + + // No valid frame data source + throw std::runtime_error( + "FrameProcessor: No valid frame data (neither nativeBuffer nor data " + "property found)"); +} + +cv::Size FrameProcessor::getFrameSize(jsi::Runtime &runtime, + const jsi::Object &frameData) { + if (!frameData.hasProperty(runtime, "width") || + !frameData.hasProperty(runtime, "height")) { + throw std::runtime_error("FrameProcessor: Frame data missing width or " + "height property"); + } + + int width = + static_cast(frameData.getProperty(runtime, "width").asNumber()); + int height = + static_cast(frameData.getProperty(runtime, "height").asNumber()); + + return cv::Size(width, height); +} + +bool FrameProcessor::hasNativeBuffer(jsi::Runtime &runtime, + const jsi::Object &frameData) { + return frameData.hasProperty(runtime, "nativeBuffer"); +} + +cv::Mat FrameProcessor::extractFromNativeBuffer(jsi::Runtime &runtime, + const jsi::Object &frameData, + int width, int height) { + auto nativeBufferValue = frameData.getProperty(runtime, "nativeBuffer"); + + // Handle bigint pointer value from JavaScript + uint64_t bufferPtr = static_cast( + nativeBufferValue.asBigInt(runtime).asUint64(runtime)); + + // Use FrameExtractor to get cv::Mat from platform-specific buffer + cv::Mat frame = FrameExtractor::extractFromNativeBuffer(bufferPtr); + + // Validate extracted frame dimensions match expected + if (frame.cols != width || frame.rows != height) { + log(LOG_LEVEL::Debug, "FrameProcessor: Dimension mismatch - expected ", + width, "x", height, " but got ", frame.cols, "x", frame.rows); + } + + return frame; +} + +cv::Mat FrameProcessor::extractFromArrayBuffer(jsi::Runtime &runtime, + const jsi::Object &frameData, + int width, int height) { + auto pixelData = frameData.getProperty(runtime, "data"); + auto arrayBuffer = pixelData.asObject(runtime).getArrayBuffer(runtime); + uint8_t *data = arrayBuffer.data(runtime); + size_t bufferSize = arrayBuffer.size(runtime); + + // Determine format based on buffer size + size_t stride = bufferSize / height; + size_t expectedRGBAStride = width * 4; + size_t expectedRGBStride = width * 3; + + cv::Mat frame; + + if (stride == expectedRGBAStride || bufferSize >= width * height * 4) { + // RGBA format with potential padding + frame = cv::Mat(height, width, CV_8UC4, data, stride); + + static bool loggedFormat = false; + if (!loggedFormat) { + log(LOG_LEVEL::Debug, + "FrameProcessor: ArrayBuffer format is RGBA, " + "stride: ", + stride); + loggedFormat = true; + } + } else if (stride >= expectedRGBStride) { + // RGB format + frame = cv::Mat(height, width, CV_8UC3, data, stride); + + static bool loggedFormat = false; + if (!loggedFormat) { + log(LOG_LEVEL::Debug, + "FrameProcessor: ArrayBuffer format is RGB, stride: ", stride); + loggedFormat = true; + } + } else { + throw std::runtime_error( + "FrameProcessor: Unexpected buffer size - expected " + + std::to_string(expectedRGBStride) + " or " + + std::to_string(expectedRGBAStride) + " bytes per row, got " + + std::to_string(stride)); + } + + return frame; +} + +} // namespace utils +} // namespace rnexecutorch diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h new file mode 100644 index 000000000..e37b5bfd6 --- /dev/null +++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h @@ -0,0 +1,109 @@ +#pragma once + +#include +#include + +namespace rnexecutorch { +namespace utils { + +using namespace facebook; + +/** + * @brief Utility class for processing camera frames from VisionCamera + * + * Provides high-level helpers for extracting and working with frames from + * react-native-vision-camera in a consistent way across all vision models. + * + * This class abstracts away the complexity of: + * - Handling both nativeBuffer (zero-copy) and ArrayBuffer (with copy) paths + * - Platform-specific buffer formats (CVPixelBuffer on iOS, AHardwareBuffer + * on Android) + * - JSI object property access and type conversions + * + * Usage: + * @code + * auto frameObj = frameData.asObject(runtime); + * cv::Mat frame = FrameProcessor::extractFrame(runtime, frameObj); + * cv::Size size = FrameProcessor::getFrameSize(runtime, frameObj); + * @endcode + */ +class FrameProcessor { +public: + /** + * @brief Extract cv::Mat from VisionCamera frame data + * + * Handles both zero-copy (nativeBuffer) and copy-based (ArrayBuffer) paths + * automatically. Prefers nativeBuffer when available for best performance. + * + * @param runtime JSI runtime + * @param frameData JSI object containing frame data from VisionCamera + * Expected properties: + * - nativeBuffer (optional): BigInt pointer to native buffer + * - data (optional): ArrayBuffer with pixel data + * - width: number + * - height: number + * + * @return cv::Mat wrapping or containing the frame data + * + * @throws std::runtime_error if neither nativeBuffer nor data is available + * @throws std::runtime_error if nativeBuffer extraction fails + * + * @note The returned cv::Mat may not own the data (zero-copy path). + * Caller must ensure the source frame remains valid during use. + */ + static cv::Mat extractFrame(jsi::Runtime &runtime, + const jsi::Object &frameData); + + /** + * @brief Get frame dimensions from VisionCamera frame data + * + * @param runtime JSI runtime + * @param frameData JSI object containing frame data + * + * @return cv::Size with frame width and height + * + * @throws std::runtime_error if width or height properties are missing + */ + static cv::Size getFrameSize(jsi::Runtime &runtime, + const jsi::Object &frameData); + + /** + * @brief Check if frame data has nativeBuffer (zero-copy path available) + * + * @param runtime JSI runtime + * @param frameData JSI object containing frame data + * @return true if nativeBuffer is available, false otherwise + */ + static bool hasNativeBuffer(jsi::Runtime &runtime, + const jsi::Object &frameData); + +private: + /** + * @brief Extract frame from nativeBuffer pointer (zero-copy) + * + * @param runtime JSI runtime + * @param frameData JSI object with nativeBuffer property + * @param width Frame width + * @param height Frame height + * @return cv::Mat wrapping the native buffer data + */ + static cv::Mat extractFromNativeBuffer(jsi::Runtime &runtime, + const jsi::Object &frameData, + int width, int height); + + /** + * @brief Extract frame from ArrayBuffer (with copy) + * + * @param runtime JSI runtime + * @param frameData JSI object with data property + * @param width Frame width + * @param height Frame height + * @return cv::Mat containing or wrapping the array buffer data + */ + static cv::Mat extractFromArrayBuffer(jsi::Runtime &runtime, + const jsi::Object &frameData, int width, + int height); +}; + +} // namespace utils +} // namespace rnexecutorch diff --git a/packages/react-native-executorch/src/modules/BaseModule.ts b/packages/react-native-executorch/src/modules/BaseModule.ts index 6aefc8b2a..315b82249 100644 --- a/packages/react-native-executorch/src/modules/BaseModule.ts +++ b/packages/react-native-executorch/src/modules/BaseModule.ts @@ -1,12 +1,68 @@ import { ResourceSource } from '../types/common'; import { TensorPtr } from '../types/common'; +/** + * Base class for all React Native Executorch modules. + * + * Provides core functionality for loading models, running inference, + * and managing native resources. + * + * @category Base Classes + */ export abstract class BaseModule { /** - * Native module instance + * Native module instance (JSI Host Object) + * @internal */ nativeModule: any = null; + /** + * Process a camera frame directly for real-time inference. + * + * This method is bound to a native JSI function after calling `load()`, + * making it worklet-compatible and safe to call from VisionCamera's + * frame processor thread. + * + * **Performance characteristics:** + * - **Zero-copy path**: When using `frame.getNativeBuffer()` from VisionCamera v5, + * frame data is accessed directly without copying (fastest, recommended). + * - **Copy path**: When using `frame.toArrayBuffer()`, pixel data is copied + * from native to JS, then accessed from native code (slower, fallback). + * + * **Usage with VisionCamera:** + * ```typescript + * const frameOutput = useFrameOutput({ + * pixelFormat: 'rgb', + * onFrame(frame) { + * 'worklet'; + * // Zero-copy approach (recommended) + * const nativeBuffer = frame.getNativeBuffer(); + * const result = model.generateFromFrame( + * { nativeBuffer: nativeBuffer.pointer, width: frame.width, height: frame.height }, + * ...args + * ); + * nativeBuffer.release(); + * frame.dispose(); + * } + * }); + * ``` + * + * @param frameData Frame data object with either nativeBuffer (zero-copy) or data (ArrayBuffer) + * @param args Additional model-specific arguments (e.g., threshold, options) + * @returns Model-specific output (e.g., detections, classifications, embeddings) + * + * @see {@link FrameData} for frame data format details + */ + public generateFromFrame!: (frameData: FrameData, ...args: any[]) => any; + + /** + * Load the model and prepare it for inference. + * + * @param modelSource - Resource location of the model binary + * @param onDownloadProgressCallback - Optional callback to monitor download progress (0-1) + * @param args - Additional model-specific loading arguments + */ + abstract load( modelSource: ResourceSource, onDownloadProgressCallback: (_: number) => void, @@ -19,6 +75,7 @@ export abstract class BaseModule { * * @param inputTensor - Array of input tensors. * @returns Array of output tensors. + * @internal */ protected async forwardET(inputTensor: TensorPtr[]): Promise { return await this.nativeModule.forward(inputTensor); @@ -36,11 +93,29 @@ export abstract class BaseModule { } /** - * Unloads the model from memory. + * Unloads the model from memory and releases native resources. + * + * Always call this method when you're done with a model to prevent memory leaks. */ delete() { if (this.nativeModule !== null) { this.nativeModule.unload(); } } + + /** + * Bind JSI methods to this instance for worklet compatibility. + * + * This makes native JSI functions accessible from worklet threads, + * which is essential for VisionCamera frame processing. + * + * @internal + */ + protected bindJSIMethods() { + if (this.nativeModule && this.nativeModule.generateFromFrame) { + // Bind the native JSI method directly to this instance + // This makes it worklet-compatible since JSI functions work across threads + this.generateFromFrame = this.nativeModule.generateFromFrame; + } + } } diff --git a/yarn.lock b/yarn.lock index 90ac56b11..436005c8d 100644 --- a/yarn.lock +++ b/yarn.lock @@ -13799,7 +13799,16 @@ __metadata: languageName: node linkType: hard -"semver@npm:^7.1.3, semver@npm:^7.3.5, semver@npm:^7.3.7, semver@npm:^7.5.2, semver@npm:^7.5.3, semver@npm:^7.5.4, semver@npm:^7.6.0, semver@npm:^7.7.1, semver@npm:^7.7.3": +"semver@npm:^7.1.3, semver@npm:^7.3.5, semver@npm:^7.3.7, semver@npm:^7.5.2, semver@npm:^7.5.3, semver@npm:^7.5.4, semver@npm:^7.6.0, semver@npm:^7.7.1": + version: 7.7.3 + resolution: "semver@npm:7.7.3" + bin: + semver: bin/semver.js + checksum: 10/8dbc3168e057a38fc322af909c7f5617483c50caddba135439ff09a754b20bdd6482a5123ff543dad4affa488ecf46ec5fb56d61312ad20bb140199b88dfaea9 + languageName: node + linkType: hard + +"semver@npm:^7.7.3": version: 7.7.4 resolution: "semver@npm:7.7.4" bin: From 9ce35daccb16772b2803d2d817d193966bde85a5 Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Mon, 16 Feb 2026 11:22:16 +0100 Subject: [PATCH 05/37] feat: remove unused bindJSIMethods --- .../src/modules/BaseModule.ts | 16 ---------------- .../computer_vision/ObjectDetectionModule.ts | 1 - 2 files changed, 17 deletions(-) diff --git a/packages/react-native-executorch/src/modules/BaseModule.ts b/packages/react-native-executorch/src/modules/BaseModule.ts index 315b82249..0870a30b6 100644 --- a/packages/react-native-executorch/src/modules/BaseModule.ts +++ b/packages/react-native-executorch/src/modules/BaseModule.ts @@ -102,20 +102,4 @@ export abstract class BaseModule { this.nativeModule.unload(); } } - - /** - * Bind JSI methods to this instance for worklet compatibility. - * - * This makes native JSI functions accessible from worklet threads, - * which is essential for VisionCamera frame processing. - * - * @internal - */ - protected bindJSIMethods() { - if (this.nativeModule && this.nativeModule.generateFromFrame) { - // Bind the native JSI method directly to this instance - // This makes it worklet-compatible since JSI functions work across threads - this.generateFromFrame = this.nativeModule.generateFromFrame; - } - } } diff --git a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts index 95b9e436b..78dfed4f6 100644 --- a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts +++ b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts @@ -28,7 +28,6 @@ export class ObjectDetectionModule extends BaseModule { onDownloadProgressCallback, model.modelSource ); - if (!paths?.[0]) { throw new RnExecutorchError( RnExecutorchErrorCode.DownloadInterrupted, From 66af65c844d9f59301ac73b21dea3c9aafe21784 Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Tue, 17 Feb 2026 13:05:14 +0100 Subject: [PATCH 06/37] feat: initial version of vision model API --- .cspell-wordlist.txt | 1 + .../app/object_detection/index.tsx | 167 ++++++++++++++++- .../rnexecutorch/RnExecutorchInstaller.h | 12 +- .../host_objects/ModelHostObject.h | 21 ++- .../metaprogramming/TypeConcepts.h | 9 +- .../rnexecutorch/models/VisionModel.cpp | 47 ++++- .../common/rnexecutorch/models/VisionModel.h | 42 ++++- .../models/embeddings/image/ImageEmbeddings.h | 2 +- .../BaseImageSegmentation.h | 2 +- .../image_segmentation/ImageSegmentation.cpp | 170 ++++++++++++++++++ .../models/object_detection/ObjectDetection.h | 18 +- .../models/style_transfer/StyleTransfer.h | 2 +- .../tests/integration/ObjectDetectionTest.cpp | 25 +-- .../computer_vision/useObjectDetection.ts | 7 +- .../src/hooks/useModule.ts | 38 ++++ .../computer_vision/ObjectDetectionModule.ts | 24 +-- .../modules/computer_vision/VisionModule.ts | 154 ++++++++++++++++ .../src/types/objectDetection.ts | 73 +++++++- 18 files changed, 745 insertions(+), 69 deletions(-) create mode 100644 packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp create mode 100644 packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts diff --git a/.cspell-wordlist.txt b/.cspell-wordlist.txt index 419872562..a2e8ecbab 100644 --- a/.cspell-wordlist.txt +++ b/.cspell-wordlist.txt @@ -116,3 +116,4 @@ antonov rfdetr basemodule IMAGENET +worklet \ No newline at end of file diff --git a/apps/computer-vision/app/object_detection/index.tsx b/apps/computer-vision/app/object_detection/index.tsx index 6a43dd920..9e60589fb 100644 --- a/apps/computer-vision/app/object_detection/index.tsx +++ b/apps/computer-vision/app/object_detection/index.tsx @@ -1,16 +1,66 @@ import Spinner from '../../components/Spinner'; -import { BottomBar } from '../../components/BottomBar'; import { getImage } from '../../utils'; import { Detection, useObjectDetection, SSDLITE_320_MOBILENET_V3_LARGE, } from 'react-native-executorch'; -import { View, StyleSheet, Image } from 'react-native'; +import { View, StyleSheet, Image, TouchableOpacity, Text } from 'react-native'; import ImageWithBboxes from '../../components/ImageWithBboxes'; import React, { useContext, useEffect, useState } from 'react'; import { GeneratingContext } from '../../context'; import ScreenWrapper from '../../ScreenWrapper'; +import ColorPalette from '../../colors'; +import { Images } from 'react-native-nitro-image'; + +// Helper function to convert image URI to raw pixel data using NitroImage +async function imageUriToPixelData( + uri: string, + targetWidth: number, + targetHeight: number +): Promise<{ + data: ArrayBuffer; + width: number; + height: number; + channels: number; +}> { + try { + // Load image and resize to target dimensions + const image = await Images.loadFromFileAsync(uri); + const resized = image.resize(targetWidth, targetHeight); + + // Get pixel data as ArrayBuffer (RGBA format) + const pixelData = resized.toRawPixelData(); + const buffer = + pixelData instanceof ArrayBuffer ? pixelData : pixelData.buffer; + + // Calculate actual buffer dimensions (accounts for device pixel ratio) + const bufferSize = buffer?.byteLength || 0; + const totalPixels = bufferSize / 4; // RGBA = 4 bytes per pixel + const aspectRatio = targetWidth / targetHeight; + const actualHeight = Math.sqrt(totalPixels / aspectRatio); + const actualWidth = totalPixels / actualHeight; + + console.log('Requested:', targetWidth, 'x', targetHeight); + console.log('Buffer size:', bufferSize); + console.log( + 'Actual dimensions:', + Math.round(actualWidth), + 'x', + Math.round(actualHeight) + ); + + return { + data: buffer, + width: Math.round(actualWidth), + height: Math.round(actualHeight), + channels: 4, // RGBA + }; + } catch (error) { + console.error('Error loading image with NitroImage:', error); + throw error; + } +} export default function ObjectDetectionScreen() { const [imageUri, setImageUri] = useState(''); @@ -42,10 +92,41 @@ export default function ObjectDetectionScreen() { const runForward = async () => { if (imageUri) { try { - const output = await ssdLite.forward(imageUri); + console.log('Running forward with string URI...'); + const output = await ssdLite.forward(imageUri, 0.5); + console.log('String URI result:', output.length, 'detections'); setResults(output); } catch (e) { - console.error(e); + console.error('Error in runForward:', e); + } + } + }; + + const runForwardPixels = async () => { + if (imageUri && imageDimensions) { + try { + console.log('Converting image to pixel data...'); + // Resize to 640x640 to avoid memory issues + const intermediateSize = 640; + const pixelData = await imageUriToPixelData( + imageUri, + intermediateSize, + intermediateSize + ); + + console.log('Running forward with pixel data...', { + width: pixelData.width, + height: pixelData.height, + channels: pixelData.channels, + dataSize: pixelData.data.byteLength, + }); + + // Run inference using unified forward() API + const output = await ssdLite.forward(pixelData, 0.5); + console.log('Pixel data result:', output.length, 'detections'); + setResults(output); + } catch (e) { + console.error('Error in runForwardPixels:', e); } } }; @@ -81,10 +162,41 @@ export default function ObjectDetectionScreen() { )} - + + {/* Custom bottom bar with two buttons */} + + + handleCameraPress(false)}> + πŸ“· Gallery + + + + + + Run (String) + + + + Run (Pixels) + + + ); } @@ -129,4 +241,43 @@ const styles = StyleSheet.create({ width: '100%', height: '100%', }, + bottomContainer: { + width: '100%', + gap: 15, + alignItems: 'center', + padding: 16, + flex: 1, + }, + bottomIconsContainer: { + flexDirection: 'row', + justifyContent: 'center', + width: '100%', + }, + iconText: { + fontSize: 16, + color: ColorPalette.primary, + }, + buttonsRow: { + flexDirection: 'row', + width: '100%', + gap: 10, + }, + button: { + height: 50, + justifyContent: 'center', + alignItems: 'center', + backgroundColor: ColorPalette.primary, + color: '#fff', + borderRadius: 8, + }, + halfButton: { + flex: 1, + }, + buttonDisabled: { + opacity: 0.5, + }, + buttonText: { + color: '#fff', + fontSize: 16, + }, }); diff --git a/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h b/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h index d5c98763d..80b7d18b3 100644 --- a/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h +++ b/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h @@ -54,8 +54,16 @@ class RnExecutorchInstaller { meta::createConstructorArgsWithCallInvoker( args, runtime, jsCallInvoker); - auto modelImplementationPtr = std::make_shared( - std::make_from_tuple(constructorArgs)); + // This unpacks the tuple and calls the constructor directly inside + // make_shared. It avoids creating a temporary object, so no + // move/copy is required. + auto modelImplementationPtr = std::apply( + [](auto &&...unpackedArgs) { + return std::make_shared( + std::forward(unpackedArgs)...); + }, + std::move(constructorArgs)); + auto modelHostObject = std::make_shared>( modelImplementationPtr, jsCallInvoker); diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h index 7712b2b9d..9a2e6776e 100644 --- a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h +++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h @@ -45,10 +45,11 @@ template class ModelHostObject : public JsiHostObject { "getInputShape")); } - if constexpr (meta::HasGenerate) { - addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject, - promiseHostFunction<&Model::generate>, - "generate")); + if constexpr (meta::HasGenerateFromString) { + addFunctions( + JSI_EXPORT_FUNCTION(ModelHostObject, + promiseHostFunction<&Model::generateFromString>, + "generateFromString")); } if constexpr (meta::HasEncode) { @@ -168,10 +169,22 @@ template class ModelHostObject : public JsiHostObject { addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject, promiseHostFunction<&Model::stream>, "stream")); + } + + // Register generateFromFrame for all VisionModel subclasses + if constexpr (meta::DerivedFromOrSameAs) { addFunctions(JSI_EXPORT_FUNCTION( ModelHostObject, synchronousHostFunction<&Model::streamStop>, "streamStop")); } + + // Register generateFromPixels for models that support it + if constexpr (meta::HasGenerateFromPixels) { + addFunctions( + JSI_EXPORT_FUNCTION(ModelHostObject, + visionHostFunction<&Model::generateFromPixels>, + "generateFromPixels")); + } } // A generic host function that runs synchronously, works analogously to the diff --git a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h index 85a3db449..8100a471b 100644 --- a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h +++ b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h @@ -12,8 +12,13 @@ template concept SameAs = std::is_same_v; template -concept HasGenerate = requires(T t) { - { &T::generate }; +concept HasGenerateFromString = requires(T t) { + { &T::generateFromString }; +}; + +template +concept HasGenerateFromPixels = requires(T t) { + { &T::generateFromPixels }; }; template diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp index 671ed03c8..54c0adfd2 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp @@ -6,8 +6,8 @@ namespace models { using namespace facebook; -cv::Mat VisionModel::extractAndPreprocess(jsi::Runtime &runtime, - const jsi::Value &frameData) const { +cv::Mat VisionModel::extractFromFrame(jsi::Runtime &runtime, + const jsi::Value &frameData) const { // Extract frame using FrameProcessor utility auto frameObj = frameData.asObject(runtime); cv::Mat frame = utils::FrameProcessor::extractFrame(runtime, frameObj); @@ -16,5 +16,48 @@ cv::Mat VisionModel::extractAndPreprocess(jsi::Runtime &runtime, return preprocessFrame(frame); } +cv::Mat VisionModel::extractFromPixels(jsi::Runtime &runtime, + const jsi::Object &pixelData) const { + // Extract width, height, and channels + if (!pixelData.hasProperty(runtime, "width") || + !pixelData.hasProperty(runtime, "height") || + !pixelData.hasProperty(runtime, "channels") || + !pixelData.hasProperty(runtime, "data")) { + throw std::runtime_error( + "Invalid pixel data: must contain width, height, channels, and data"); + } + + int width = pixelData.getProperty(runtime, "width").asNumber(); + int height = pixelData.getProperty(runtime, "height").asNumber(); + int channels = pixelData.getProperty(runtime, "channels").asNumber(); + + // Get the ArrayBuffer + auto dataValue = pixelData.getProperty(runtime, "data"); + if (!dataValue.isObject() || + !dataValue.asObject(runtime).isArrayBuffer(runtime)) { + throw std::runtime_error( + "pixel data 'data' property must be an ArrayBuffer"); + } + + auto arrayBuffer = dataValue.asObject(runtime).getArrayBuffer(runtime); + size_t expectedSize = width * height * channels; + + if (arrayBuffer.size(runtime) != expectedSize) { + throw std::runtime_error( + "ArrayBuffer size does not match width * height * channels"); + } + + // Create cv::Mat and copy the data + // OpenCV uses BGR/BGRA format internally, but we'll create as-is and let + // preprocessFrame handle conversion + int cvType = (channels == 3) ? CV_8UC3 : CV_8UC4; + cv::Mat image(height, width, cvType); + + // Copy data from ArrayBuffer to cv::Mat + std::memcpy(image.data, arrayBuffer.data(runtime), expectedSize); + + return image; +} + } // namespace models } // namespace rnexecutorch diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h index 11da49547..9ba5cf7e4 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h @@ -121,12 +121,48 @@ class VisionModel : public BaseModel { * responsible * @note Typical usage: * @code - * cv::Mat preprocessed = extractAndPreprocess(runtime, frameData); + * cv::Mat preprocessed = extractFromFrame(runtime, frameData); * auto tensor = image_processing::getTensorFromMatrix(dims, preprocessed); * @endcode */ - cv::Mat extractAndPreprocess(jsi::Runtime &runtime, - const jsi::Value &frameData) const; + cv::Mat extractFromFrame(jsi::Runtime &runtime, + const jsi::Value &frameData) const; + + /** + * @brief Extract cv::Mat from raw pixel data (ArrayBuffer) sent from + * JavaScript + * + * This method enables users to run inference on raw pixel data without file + * I/O. Useful for processing images already in memory (e.g., from canvas, + * image library). + * + * @param runtime JSI runtime + * @param pixelData JSI object containing: + * - data: ArrayBuffer with raw pixel values + * - width: number - image width + * - height: number - image height + * - channels: number - number of channels (3 for RGB, 4 for + * RGBA) + * + * @return cv::Mat containing the pixel data + * + * @throws std::runtime_error if pixelData format is invalid + * + * @note The returned cv::Mat owns a copy of the data + * @note Expected pixel format: RGB or RGBA, row-major order + * @note Typical usage from JS: + * @code + * const pixels = new Uint8Array([...]); // Raw pixel data + * const result = model.generateFromPixels({ + * data: pixels.buffer, + * width: 640, + * height: 480, + * channels: 3 + * }, 0.5); + * @endcode + */ + cv::Mat extractFromPixels(jsi::Runtime &runtime, + const jsi::Object &pixelData) const; }; } // namespace models diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h index 7e114e939..9a1d6429b 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h @@ -27,4 +27,4 @@ class ImageEmbeddings final : public BaseEmbeddings { REGISTER_CONSTRUCTOR(models::embeddings::ImageEmbeddings, std::string, std::shared_ptr); -} // namespace rnexecutorch +} // namespace rnexecutorch \ No newline at end of file diff --git a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h index f46f41d69..34ad8dffd 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h @@ -58,4 +58,4 @@ class BaseImageSegmentation : public BaseModel { REGISTER_CONSTRUCTOR(models::image_segmentation::BaseImageSegmentation, std::string, std::vector, std::vector, std::shared_ptr); -} // namespace rnexecutorch +} // namespace rnexecutorch \ No newline at end of file diff --git a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp new file mode 100644 index 000000000..08f2a4683 --- /dev/null +++ b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp @@ -0,0 +1,170 @@ +#include "ImageSegmentation.h" + +#include + +#include +#include +#include +#include +#include +#include + +namespace rnexecutorch::models::image_segmentation { + +ImageSegmentation::ImageSegmentation( + const std::string &modelSource, + std::shared_ptr callInvoker) + : BaseModel(modelSource, callInvoker) { + auto inputShapes = getAllInputShapes(); + if (inputShapes.size() == 0) { + throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs, + "Model seems to not take any input tensors."); + } + std::vector modelInputShape = inputShapes[0]; + if (modelInputShape.size() < 2) { + char errorMessage[100]; + std::snprintf(errorMessage, sizeof(errorMessage), + "Unexpected model input size, expected at least 2 dimentions " + "but got: %zu.", + modelInputShape.size()); + throw RnExecutorchError(RnExecutorchErrorCode::WrongDimensions, + errorMessage); + } + modelImageSize = cv::Size(modelInputShape[modelInputShape.size() - 1], + modelInputShape[modelInputShape.size() - 2]); + numModelPixels = modelImageSize.area(); +} + +std::shared_ptr ImageSegmentation::generate( + std::string imageSource, + std::set> classesOfInterest, bool resize) { + auto [inputTensor, originalSize] = + image_processing::readImageToTensor(imageSource, getAllInputShapes()[0]); + + auto forwardResult = BaseModel::forward(inputTensor); + if (!forwardResult.ok()) { + throw RnExecutorchError(forwardResult.error(), + "The model's forward function did not succeed. " + "Ensure the model input is correct."); + } + + return postprocess(forwardResult->at(0).toTensor(), originalSize, + classesOfInterest, resize); +} + +std::shared_ptr ImageSegmentation::postprocess( + const Tensor &tensor, cv::Size originalSize, + std::set> classesOfInterest, bool resize) { + + auto dataPtr = static_cast(tensor.const_data_ptr()); + auto resultData = std::span(dataPtr, tensor.numel()); + + // We copy the ET-owned data to jsi array buffers that can be directly + // returned to JS + std::vector> resultClasses; + resultClasses.reserve(numClasses); + for (std::size_t cl = 0; cl < numClasses; ++cl) { + auto classBuffer = std::make_shared( + &resultData[cl * numModelPixels], numModelPixels * sizeof(float)); + resultClasses.push_back(classBuffer); + } + + // Apply softmax per each pixel across all classes + for (std::size_t pixel = 0; pixel < numModelPixels; ++pixel) { + std::vector classValues(numClasses); + for (std::size_t cl = 0; cl < numClasses; ++cl) { + classValues[cl] = + reinterpret_cast(resultClasses[cl]->data())[pixel]; + } + numerical::softmax(classValues); + for (std::size_t cl = 0; cl < numClasses; ++cl) { + reinterpret_cast(resultClasses[cl]->data())[pixel] = + classValues[cl]; + } + } + + // Calculate the maximum class for each pixel + auto argmax = + std::make_shared(numModelPixels * sizeof(int32_t)); + for (std::size_t pixel = 0; pixel < numModelPixels; ++pixel) { + float max = reinterpret_cast(resultClasses[0]->data())[pixel]; + int maxInd = 0; + for (int cl = 1; cl < numClasses; ++cl) { + if (reinterpret_cast(resultClasses[cl]->data())[pixel] > max) { + maxInd = cl; + max = reinterpret_cast(resultClasses[cl]->data())[pixel]; + } + } + reinterpret_cast(argmax->data())[pixel] = maxInd; + } + + auto buffersToReturn = std::make_shared>>(); + for (std::size_t cl = 0; cl < numClasses; ++cl) { + if (classesOfInterest.contains(constants::kDeeplabV3Resnet50Labels[cl])) { + (*buffersToReturn)[constants::kDeeplabV3Resnet50Labels[cl]] = + resultClasses[cl]; + } + } + + // Resize selected classes and argmax + if (resize) { + cv::Mat argmaxMat(modelImageSize, CV_32SC1, argmax->data()); + cv::resize(argmaxMat, argmaxMat, originalSize, 0, 0, + cv::InterpolationFlags::INTER_NEAREST); + argmax = std::make_shared( + argmaxMat.data, originalSize.area() * sizeof(int32_t)); + + for (auto &[label, arrayBuffer] : *buffersToReturn) { + cv::Mat classMat(modelImageSize, CV_32FC1, arrayBuffer->data()); + cv::resize(classMat, classMat, originalSize); + arrayBuffer = std::make_shared( + classMat.data, originalSize.area() * sizeof(float)); + } + } + return populateDictionary(argmax, buffersToReturn); +} + +std::shared_ptr ImageSegmentation::populateDictionary( + std::shared_ptr argmax, + std::shared_ptr>> + classesToOutput) { + // Synchronize the invoked thread to return when the dict is constructed + auto promisePtr = std::make_shared>(); + std::future doneFuture = promisePtr->get_future(); + + std::shared_ptr dictPtr = nullptr; + callInvoker->invokeAsync( + [argmax, classesToOutput, &dictPtr, promisePtr](jsi::Runtime &runtime) { + dictPtr = std::make_shared(runtime); + auto argmaxArrayBuffer = jsi::ArrayBuffer(runtime, argmax); + + auto int32ArrayCtor = + runtime.global().getPropertyAsFunction(runtime, "Int32Array"); + auto int32Array = + int32ArrayCtor.callAsConstructor(runtime, argmaxArrayBuffer) + .getObject(runtime); + dictPtr->setProperty(runtime, "ARGMAX", int32Array); + + for (auto &[classLabel, owningBuffer] : *classesToOutput) { + auto classArrayBuffer = jsi::ArrayBuffer(runtime, owningBuffer); + + auto float32ArrayCtor = + runtime.global().getPropertyAsFunction(runtime, "Float32Array"); + auto float32Array = + float32ArrayCtor.callAsConstructor(runtime, classArrayBuffer) + .getObject(runtime); + + dictPtr->setProperty( + runtime, jsi::String::createFromAscii(runtime, classLabel.data()), + float32Array); + } + promisePtr->set_value(); + }); + + doneFuture.wait(); + return dictPtr; +} + +} // namespace rnexecutorch::models::image_segmentation \ No newline at end of file diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h index bba09a6d8..fc554003b 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h @@ -8,7 +8,7 @@ #include "Types.h" #include "rnexecutorch/metaprogramming/ConstructorHelpers.h" -#include +#include #include namespace rnexecutorch { @@ -16,12 +16,24 @@ namespace models::object_detection { using executorch::extension::TensorPtr; using executorch::runtime::EValue; -class ObjectDetection : public BaseModel { +class ObjectDetection : public VisionModel { public: ObjectDetection(const std::string &modelSource, std::shared_ptr callInvoker); [[nodiscard("Registered non-void function")]] std::vector - generate(std::string imageSource, double detectionThreshold); + generateFromString(std::string imageSource, double detectionThreshold); + [[nodiscard("Registered non-void function")]] std::vector + generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData, + double detectionThreshold); + [[nodiscard("Registered non-void function")]] std::vector + generateFromPixels(jsi::Runtime &runtime, const jsi::Value &pixelData, + double detectionThreshold); + +protected: + // Internal helper for shared preprocessing and inference logic + std::vector runInference(cv::Mat image, + double detectionThreshold); + cv::Mat preprocessFrame(const cv::Mat &frame) const override; private: std::vector postprocess(const std::vector &tensors, diff --git a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h index 73744c4d8..8eed3c888 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h @@ -33,4 +33,4 @@ class StyleTransfer : public BaseModel { REGISTER_CONSTRUCTOR(models::style_transfer::StyleTransfer, std::string, std::shared_ptr); -} // namespace rnexecutorch +} // namespace rnexecutorch \ No newline at end of file diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp index ae80208a6..074ee0751 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp @@ -29,7 +29,7 @@ template <> struct ModelTraits { } static void callGenerate(ModelType &model) { - (void)model.generate(kValidTestImagePath, 0.5); + (void)model.generateFromString(kValidTestImagePath, 0.5); } }; } // namespace model_tests @@ -43,49 +43,50 @@ INSTANTIATE_TYPED_TEST_SUITE_P(ObjectDetection, CommonModelTest, // ============================================================================ TEST(ObjectDetectionGenerateTests, InvalidImagePathThrows) { ObjectDetection model(kValidObjectDetectionModelPath, nullptr); - EXPECT_THROW((void)model.generate("nonexistent_image.jpg", 0.5), + EXPECT_THROW((void)model.generateFromString("nonexistent_image.jpg", 0.5), RnExecutorchError); } TEST(ObjectDetectionGenerateTests, EmptyImagePathThrows) { ObjectDetection model(kValidObjectDetectionModelPath, nullptr); - EXPECT_THROW((void)model.generate("", 0.5), RnExecutorchError); + EXPECT_THROW((void)model.generateFromString("", 0.5), RnExecutorchError); } TEST(ObjectDetectionGenerateTests, MalformedURIThrows) { ObjectDetection model(kValidObjectDetectionModelPath, nullptr); - EXPECT_THROW((void)model.generate("not_a_valid_uri://bad", 0.5), + EXPECT_THROW((void)model.generateFromString("not_a_valid_uri://bad", 0.5), RnExecutorchError); } TEST(ObjectDetectionGenerateTests, NegativeThresholdThrows) { ObjectDetection model(kValidObjectDetectionModelPath, nullptr); - EXPECT_THROW((void)model.generate(kValidTestImagePath, -0.1), + EXPECT_THROW((void)model.generateFromString(kValidTestImagePath, -0.1), RnExecutorchError); } TEST(ObjectDetectionGenerateTests, ThresholdAboveOneThrows) { ObjectDetection model(kValidObjectDetectionModelPath, nullptr); - EXPECT_THROW((void)model.generate(kValidTestImagePath, 1.1), + EXPECT_THROW((void)model.generateFromString(kValidTestImagePath, 1.1), RnExecutorchError); } TEST(ObjectDetectionGenerateTests, ValidImageReturnsResults) { ObjectDetection model(kValidObjectDetectionModelPath, nullptr); - auto results = model.generate(kValidTestImagePath, 0.3); + auto results = model.generateFromString(kValidTestImagePath, 0.3); EXPECT_GE(results.size(), 0u); } TEST(ObjectDetectionGenerateTests, HighThresholdReturnsFewerResults) { ObjectDetection model(kValidObjectDetectionModelPath, nullptr); - auto lowThresholdResults = model.generate(kValidTestImagePath, 0.1); - auto highThresholdResults = model.generate(kValidTestImagePath, 0.9); + auto lowThresholdResults = model.generateFromString(kValidTestImagePath, 0.1); + auto highThresholdResults = + model.generateFromString(kValidTestImagePath, 0.9); EXPECT_GE(lowThresholdResults.size(), highThresholdResults.size()); } TEST(ObjectDetectionGenerateTests, DetectionsHaveValidBoundingBoxes) { ObjectDetection model(kValidObjectDetectionModelPath, nullptr); - auto results = model.generate(kValidTestImagePath, 0.3); + auto results = model.generateFromString(kValidTestImagePath, 0.3); for (const auto &detection : results) { EXPECT_LE(detection.x1, detection.x2); @@ -97,7 +98,7 @@ TEST(ObjectDetectionGenerateTests, DetectionsHaveValidBoundingBoxes) { TEST(ObjectDetectionGenerateTests, DetectionsHaveValidScores) { ObjectDetection model(kValidObjectDetectionModelPath, nullptr); - auto results = model.generate(kValidTestImagePath, 0.3); + auto results = model.generateFromString(kValidTestImagePath, 0.3); for (const auto &detection : results) { EXPECT_GE(detection.score, 0.0f); @@ -107,7 +108,7 @@ TEST(ObjectDetectionGenerateTests, DetectionsHaveValidScores) { TEST(ObjectDetectionGenerateTests, DetectionsHaveValidLabels) { ObjectDetection model(kValidObjectDetectionModelPath, nullptr); - auto results = model.generate(kValidTestImagePath, 0.3); + auto results = model.generateFromString(kValidTestImagePath, 0.3); for (const auto &detection : results) { EXPECT_GE(detection.label, 0); diff --git a/packages/react-native-executorch/src/hooks/computer_vision/useObjectDetection.ts b/packages/react-native-executorch/src/hooks/computer_vision/useObjectDetection.ts index 2d52eb706..845f1aa23 100644 --- a/packages/react-native-executorch/src/hooks/computer_vision/useObjectDetection.ts +++ b/packages/react-native-executorch/src/hooks/computer_vision/useObjectDetection.ts @@ -15,9 +15,10 @@ import { export const useObjectDetection = ({ model, preventLoad = false, -}: ObjectDetectionProps): ObjectDetectionType => - useModule({ +}: ObjectDetectionProps): ObjectDetectionType => { + return useModule({ module: ObjectDetectionModule, model, preventLoad: preventLoad, - }); + }) as ObjectDetectionType; +}; diff --git a/packages/react-native-executorch/src/hooks/useModule.ts b/packages/react-native-executorch/src/hooks/useModule.ts index 1a35885d5..624094afb 100644 --- a/packages/react-native-executorch/src/hooks/useModule.ts +++ b/packages/react-native-executorch/src/hooks/useModule.ts @@ -6,6 +6,7 @@ interface Module { load: (...args: any[]) => Promise; forward: (...args: any[]) => Promise; delete: () => void; + nativeModule?: any; // JSI host object with native methods } interface ModuleConstructor { @@ -31,6 +32,7 @@ export const useModule = < const [isGenerating, setIsGenerating] = useState(false); const [downloadProgress, setDownloadProgress] = useState(0); const [moduleInstance] = useState(() => new module()); + const [runOnFrame, setRunOnFrame] = useState(null); useEffect(() => { if (preventLoad) return; @@ -46,6 +48,15 @@ export const useModule = < if (isMounted) setDownloadProgress(progress); }); if (isMounted) setIsReady(true); + + // Extract runOnFrame worklet from VisionModule if available + // Use "state trick" to make the worklet serializable for VisionCamera + if ('runOnFrame' in moduleInstance) { + const worklet = moduleInstance.runOnFrame; + if (worklet) { + setRunOnFrame(() => worklet); + } + } } catch (err) { if (isMounted) setError(parseUnknownError(err)); } @@ -99,5 +110,32 @@ export const useModule = < */ downloadProgress, forward, + + /** + * Synchronous worklet function for real-time VisionCamera frame processing. + * Automatically handles native buffer extraction and cleanup. + * + * Only available for Computer Vision modules that support real-time frame processing + * (e.g., ObjectDetection, Classification, ImageSegmentation). + * Returns `null` if the module doesn't implement frame processing. + * + * **Use this for VisionCamera frame processing in worklets.** + * For async processing, use `forward()` instead. + * + * @example + * ```typescript + * const { runOnFrame } = useObjectDetection({ model: MODEL }); + * + * const frameOutput = useFrameOutput({ + * onFrame(frame) { + * 'worklet'; + * if (!runOnFrame) return; + * const detections = runOnFrame(frame, 0.5); + * frame.dispose(); + * } + * }); + * ``` + */ + runOnFrame, }; }; diff --git a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts index 78dfed4f6..0818d9682 100644 --- a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts +++ b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts @@ -3,15 +3,15 @@ import { ResourceSource } from '../../types/common'; import { Detection } from '../../types/objectDetection'; import { RnExecutorchErrorCode } from '../../errors/ErrorCodes'; import { parseUnknownError, RnExecutorchError } from '../../errors/errorUtils'; -import { BaseModule } from '../BaseModule'; import { Logger } from '../../common/Logger'; +import { VisionModule } from './VisionModule'; /** * Module for object detection tasks. * * @category Typescript API */ -export class ObjectDetectionModule extends BaseModule { +export class ObjectDetectionModule extends VisionModule { /** * Loads the model, where `modelSource` is a string that specifies the location of the model binary. * To track the download progress, supply a callback function `onDownloadProgressCallback`. @@ -41,24 +41,4 @@ export class ObjectDetectionModule extends BaseModule { throw parseUnknownError(error); } } - - /** - * Executes the model's forward pass, where `imageSource` can be a fetchable resource or a Base64-encoded string. - * `detectionThreshold` can be supplied to alter the sensitivity of the detection. - * - * @param imageSource - The image source to be processed. - * @param detectionThreshold - The threshold for detection sensitivity. Default is 0.7. - * @returns An array of Detection objects representing detected items in the image. - */ - async forward( - imageSource: string, - detectionThreshold: number = 0.7 - ): Promise { - if (this.nativeModule == null) - throw new RnExecutorchError( - RnExecutorchErrorCode.ModuleNotLoaded, - 'The model is currently not loaded. Please load the model before calling forward().' - ); - return await this.nativeModule.generate(imageSource, detectionThreshold); - } } diff --git a/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts b/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts new file mode 100644 index 000000000..06acf6654 --- /dev/null +++ b/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts @@ -0,0 +1,154 @@ +import { BaseModule } from '../BaseModule'; +import { RnExecutorchErrorCode } from '../../errors/ErrorCodes'; +import { RnExecutorchError } from '../../errors/errorUtils'; + +/** + * Raw pixel data for vision model inference. + */ +export type PixelData = { + data: ArrayBuffer; + width: number; + height: number; + channels: number; +}; + +/** + * VisionCamera Frame object for real-time processing. + */ +export type Frame = { + getNativeBuffer(): { pointer: number; release(): void }; + width: number; + height: number; +}; + +/** + * Base class for computer vision models that support multiple input types. + * + * VisionModule extends BaseModule with: + * - Unified `forward()` API accepting string paths or raw pixel data + * - `runOnFrame` getter for real-time VisionCamera frame processing + * - Shared frame processor creation logic + * + * Subclasses should only implement model-specific loading logic. + * + * @category Typescript API + */ +export abstract class VisionModule extends BaseModule { + /** + * Synchronous worklet function for real-time VisionCamera frame processing. + * + * Only available after the model is loaded. Returns null if not loaded. + * + * **Use this for VisionCamera frame processing in worklets.** + * For async processing, use `forward()` instead. + * + * @example + * ```typescript + * const model = new ClassificationModule(); + * await model.load({ modelSource: MODEL }); + * + * const frameOutput = useFrameOutput({ + * onFrame(frame) { + * 'worklet'; + * if (!model.runOnFrame) return; + * const result = model.runOnFrame(frame); + * frame.dispose(); + * } + * }); + * ``` + */ + get runOnFrame(): ((frame: Frame, ...args: any[]) => TOutput) | null { + if (!this.nativeModule?.generateFromFrame) { + return null; + } + + // Extract pure JSI function reference (runs on JS thread) + const nativeGenerateFromFrame = this.nativeModule.generateFromFrame; + + // Return worklet that captures ONLY the JSI function + return (frame: any, ...args: any[]): TOutput => { + 'worklet'; + + let nativeBuffer: any = null; + try { + nativeBuffer = frame.getNativeBuffer(); + const frameData = { + nativeBuffer: nativeBuffer.pointer, + width: frame.width, + height: frame.height, + }; + return nativeGenerateFromFrame(frameData, ...args); + } finally { + if (nativeBuffer?.release) { + nativeBuffer.release(); + } + } + }; + } + + /** + * Executes the model's forward pass with automatic input type detection. + * + * Supports two input types: + * 1. **String path/URI**: File path, URL, or Base64-encoded string + * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage) + * + * **Note**: For VisionCamera frame processing, use `forwardSync` instead. + * This method is async and cannot be called in worklet context. + * + * @param input - Image source (string path or PixelData object) + * @param args - Additional model-specific arguments + * @returns A Promise that resolves to the model output. + * + * @example + * ```typescript + * // String path (async) + * const result1 = await model.forward('file:///path/to/image.jpg'); + * + * // Pixel data (async) + * const result2 = await model.forward({ + * data: pixelBuffer, + * width: 640, + * height: 480, + * channels: 3 + * }); + * + * // For VisionCamera frames, use runOnFrame in worklet: + * const frameOutput = useFrameOutput({ + * onFrame(frame) { + * 'worklet'; + * if (!model.runOnFrame) return; + * const result = model.runOnFrame(frame); + * } + * }); + * ``` + */ + async forward(input: string | PixelData, ...args: any[]): Promise { + if (this.nativeModule == null) + throw new RnExecutorchError( + RnExecutorchErrorCode.ModuleNotLoaded, + 'The model is currently not loaded. Please load the model before calling forward().' + ); + + // Type detection and routing + if (typeof input === 'string') { + // String path β†’ generateFromString() + return await this.nativeModule.generateFromString(input, ...args); + } else if ( + typeof input === 'object' && + 'data' in input && + input.data instanceof ArrayBuffer && + typeof input.width === 'number' && + typeof input.height === 'number' && + typeof input.channels === 'number' + ) { + // Pixel data β†’ generateFromPixels() + return await this.nativeModule.generateFromPixels(input, ...args); + } else { + throw new RnExecutorchError( + RnExecutorchErrorCode.InvalidArgument, + 'Invalid input: expected string path or PixelData object. For VisionCamera frames, use runOnFrame instead.' + ); + } + } +} diff --git a/packages/react-native-executorch/src/types/objectDetection.ts b/packages/react-native-executorch/src/types/objectDetection.ts index 94f7cf5c0..2dddaad64 100644 --- a/packages/react-native-executorch/src/types/objectDetection.ts +++ b/packages/react-native-executorch/src/types/objectDetection.ts @@ -170,14 +170,77 @@ export interface ObjectDetectionType { downloadProgress: number; /** - * Executes the model's forward pass to detect objects within the provided image. - * @param imageSource - A string representing the image source (e.g., a file path, URI, or base64 string) to be processed. - * @param detectionThreshold - An optional number between 0 and 1 representing the minimum confidence score required for an object to be included in the results. Default is 0.7. - * @returns A Promise that resolves to an array of `Detection` objects, where each object typically contains bounding box coordinates, a class label, and a confidence score. + * Executes the model's forward pass with automatic input type detection. + * + * Supports two input types: + * 1. **String path/URI**: File path, URL, or Base64-encoded string + * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage) + * + * **Note**: For VisionCamera frame processing, use `processFrame` instead. + * + * @param input - Image source (string or PixelData object) + * @param detectionThreshold - An optional number between 0 and 1 representing the minimum confidence score. Default is 0.7. + * @returns A Promise that resolves to an array of `Detection` objects. * @throws {RnExecutorchError} If the model is not loaded or is currently processing another image. + * + * @example + * ```typescript + * // String path + * const detections1 = await model.forward('file:///path/to/image.jpg'); + * + * // Pixel data + * const detections2 = await model.forward({ + * data: pixelBuffer, + * width: 640, + * height: 480, + * channels: 3 + * }); + * ``` */ forward: ( - imageSource: string, + input: + | string + | { + data: ArrayBuffer; + width: number; + height: number; + channels: number; + }, detectionThreshold?: number ) => Promise; + + /** + * Synchronous worklet function for real-time VisionCamera frame processing. + * Automatically handles native buffer extraction and cleanup. + * + * **Use this for VisionCamera frame processing in worklets.** + * For async processing, use `forward()` instead. + * + * Available after model is loaded (`isReady: true`). + * + * @example + * ```typescript + * const { runOnFrame, isReady } = useObjectDetection({ model: MODEL }); + * + * const frameOutput = useFrameOutput({ + * onFrame(frame) { + * 'worklet'; + * if (!runOnFrame) return; + * const detections = runOnFrame(frame, 0.5); + * frame.dispose(); + * } + * }); + * ``` + * + * @param frame - VisionCamera Frame object + * @param detectionThreshold - The threshold for detection sensitivity. Default is 0.7. + * @returns Array of Detection objects representing detected items in the frame. + */ + runOnFrame: ((frame: any, detectionThreshold?: number) => Detection[]) | null; + + /** + * Direct reference to the module instance for advanced use cases. + * Most users should use `forward()` for async processing or `runOnFrame` for real-time frame processing. + */ + moduleInstance: any; } From 6e413ac5c36d39f6f0489487060c09a46b29dbb1 Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Tue, 17 Feb 2026 17:51:10 +0100 Subject: [PATCH 07/37] refactor: errors, logs, unnecessary comments, use existing TensorPtr --- .../app/object_detection/index.tsx | 61 +++++++------- apps/computer-vision/package.json | 9 ++- .../host_objects/JsiConversions.h | 19 +++++ .../host_objects/ModelHostObject.h | 62 +++++++++++++- .../rnexecutorch/models/VisionModel.cpp | 69 +++++++++------- .../rnexecutorch/utils/FrameExtractor.cpp | 63 +++++---------- .../rnexecutorch/utils/FrameProcessor.cpp | 80 +++++-------------- .../rnexecutorch/utils/FrameProcessor.h | 14 ++-- .../src/hooks/useModule.ts | 1 - .../modules/computer_vision/VisionModule.ts | 41 +++------- .../src/types/common.ts | 59 +++++++++----- .../src/types/objectDetection.ts | 6 -- 12 files changed, 255 insertions(+), 229 deletions(-) diff --git a/apps/computer-vision/app/object_detection/index.tsx b/apps/computer-vision/app/object_detection/index.tsx index 9e60589fb..54c0eb18f 100644 --- a/apps/computer-vision/app/object_detection/index.tsx +++ b/apps/computer-vision/app/object_detection/index.tsx @@ -13,7 +13,26 @@ import ScreenWrapper from '../../ScreenWrapper'; import ColorPalette from '../../colors'; import { Images } from 'react-native-nitro-image'; -// Helper function to convert image URI to raw pixel data using NitroImage +// Helper function to convert BGRA to RGB +function convertBGRAtoRGB( + buffer: ArrayBuffer, + width: number, + height: number +): ArrayBuffer { + const source = new Uint8Array(buffer); + const rgb = new Uint8Array(width * height * 3); + + for (let i = 0; i < width * height; i++) { + // BGRA format: [B, G, R, A] β†’ RGB: [R, G, B] + rgb[i * 3 + 0] = source[i * 4 + 2]; // R + rgb[i * 3 + 1] = source[i * 4 + 1]; // G + rgb[i * 3 + 2] = source[i * 4 + 0]; // B + } + + return rgb.buffer; +} + +// Helper function to convert image URI to raw RGB pixel data async function imageUriToPixelData( uri: string, targetWidth: number, @@ -29,32 +48,19 @@ async function imageUriToPixelData( const image = await Images.loadFromFileAsync(uri); const resized = image.resize(targetWidth, targetHeight); - // Get pixel data as ArrayBuffer (RGBA format) - const pixelData = resized.toRawPixelData(); + // Get pixel data as ArrayBuffer (BGRA format from NitroImage) + const rawPixelData = resized.toRawPixelData(); const buffer = - pixelData instanceof ArrayBuffer ? pixelData : pixelData.buffer; - - // Calculate actual buffer dimensions (accounts for device pixel ratio) - const bufferSize = buffer?.byteLength || 0; - const totalPixels = bufferSize / 4; // RGBA = 4 bytes per pixel - const aspectRatio = targetWidth / targetHeight; - const actualHeight = Math.sqrt(totalPixels / aspectRatio); - const actualWidth = totalPixels / actualHeight; + rawPixelData instanceof ArrayBuffer ? rawPixelData : rawPixelData.buffer; - console.log('Requested:', targetWidth, 'x', targetHeight); - console.log('Buffer size:', bufferSize); - console.log( - 'Actual dimensions:', - Math.round(actualWidth), - 'x', - Math.round(actualHeight) - ); + // Convert BGRA to RGB as required by the native API + const rgbBuffer = convertBGRAtoRGB(buffer, targetWidth, targetHeight); return { - data: buffer, - width: Math.round(actualWidth), - height: Math.round(actualHeight), - channels: 4, // RGBA + data: rgbBuffer, + width: targetWidth, + height: targetHeight, + channels: 3, // RGB }; } catch (error) { console.error('Error loading image with NitroImage:', error); @@ -106,12 +112,11 @@ export default function ObjectDetectionScreen() { if (imageUri && imageDimensions) { try { console.log('Converting image to pixel data...'); - // Resize to 640x640 to avoid memory issues - const intermediateSize = 640; + // Use original dimensions - let the model resize internally const pixelData = await imageUriToPixelData( imageUri, - intermediateSize, - intermediateSize + imageDimensions.width, + imageDimensions.height ); console.log('Running forward with pixel data...', { @@ -122,7 +127,7 @@ export default function ObjectDetectionScreen() { }); // Run inference using unified forward() API - const output = await ssdLite.forward(pixelData, 0.5); + const output = await ssdLite.forward(pixelData, 0.3); console.log('Pixel data result:', output.length, 'detections'); setResults(output); } catch (e) { diff --git a/apps/computer-vision/package.json b/apps/computer-vision/package.json index cce918197..3f47c357c 100644 --- a/apps/computer-vision/package.json +++ b/apps/computer-vision/package.json @@ -17,6 +17,7 @@ "@react-navigation/native": "^7.1.6", "@shopify/react-native-skia": "2.2.12", "expo": "^54.0.27", + "expo-build-properties": "~1.0.10", "expo-constants": "~18.0.11", "expo-font": "~14.0.10", "expo-linking": "~8.0.10", @@ -30,17 +31,19 @@ "react-native-gesture-handler": "~2.28.0", "react-native-image-picker": "^7.2.2", "react-native-loading-spinner-overlay": "^3.0.1", - "react-native-reanimated": "~4.1.1", + "react-native-nitro-image": "0.10.2", + "react-native-nitro-modules": "0.33.4", + "react-native-reanimated": "~4.2.1", "react-native-safe-area-context": "~5.6.0", "react-native-screens": "~4.16.0", "react-native-svg": "15.12.1", "react-native-svg-transformer": "^1.5.0", - "react-native-worklets": "0.5.1" + "react-native-worklets": "^0.7.2" }, "devDependencies": { "@babel/core": "^7.25.2", "@types/pngjs": "^6.0.5", - "@types/react": "~19.1.10" + "@types/react": "~19.2.0" }, "private": true } diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h index df9abbdef..b4409b0f2 100644 --- a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h +++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h @@ -360,6 +360,25 @@ inline jsi::Value getJsiValue(uint64_t val, jsi::Runtime &runtime) { return {runtime, bigInt}; } +inline jsi::Value getJsiValue(const std::vector &vec, + jsi::Runtime &runtime) { + jsi::Array array(runtime, vec.size()); + for (size_t i = 0; i < vec.size(); i++) { + // JS numbers are doubles. Large uint64s > 2^53 will lose precision. + array.setValueAtIndex(runtime, i, jsi::Value(static_cast(vec[i]))); + } + return {runtime, array}; +} + +inline jsi::Value getJsiValue(const std::vector &vec, + jsi::Runtime &runtime) { + jsi::Array array(runtime, vec.size()); + for (size_t i = 0; i < vec.size(); i++) { + array.setValueAtIndex(runtime, i, jsi::Value(static_cast(vec[i]))); + } + return {runtime, array}; +} + inline jsi::Value getJsiValue(int val, jsi::Runtime &runtime) { return {runtime, val}; } diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h index 9a2e6776e..9554f1888 100644 --- a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h +++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h @@ -171,14 +171,12 @@ template class ModelHostObject : public JsiHostObject { "stream")); } - // Register generateFromFrame for all VisionModel subclasses if constexpr (meta::DerivedFromOrSameAs) { addFunctions(JSI_EXPORT_FUNCTION( ModelHostObject, synchronousHostFunction<&Model::streamStop>, "streamStop")); } - // Register generateFromPixels for models that support it if constexpr (meta::HasGenerateFromPixels) { addFunctions( JSI_EXPORT_FUNCTION(ModelHostObject, @@ -234,6 +232,66 @@ template class ModelHostObject : public JsiHostObject { } } + template JSI_HOST_FUNCTION(visionHostFunction) { + // 1. Check Argument Count + // (We rely on our new FunctionTraits) + constexpr std::size_t cppArgCount = + meta::FunctionTraits::arity; + + // We expect JS args = (Total C++ Args) - (2 injected args: Runtime + Value) + constexpr std::size_t expectedJsArgs = cppArgCount - 1; + log(LOG_LEVEL::Debug, cppArgCount, count); + if (count != expectedJsArgs) { + throw jsi::JSError(runtime, "Argument count mismatch in vision function"); + } + + try { + // 2. The Magic Trick + // We get a pointer to a dummy function: void dummy(Rest...) {} + // This function has exactly the signature of the arguments we want to + // parse. + auto dummyFuncPtr = &meta::TailSignature::dummy; + + // 3. Let existing helpers do the work + // We pass the dummy pointer. The helper inspects its arguments (Rest...) + // and converts args[0]...args[N] accordingly. + // Note: We pass (args + 1) because JS args[0] is the PixelData, which we + // handle manually. Note: We use expectedJsArgs - 1 because we skipped one + // JS arg. + auto tailArgsTuple = + meta::createArgsTupleFromJsi(dummyFuncPtr, args + 1, runtime); + + // 4. Invoke + using ReturnType = + typename meta::FunctionTraits::return_type; + + if constexpr (std::is_void_v) { + std::apply( + [&](auto &&...tailArgs) { + (model.get()->*FnPtr)( + runtime, + args[0], // 1. PixelData (Manually passed) + std::forward( + tailArgs)...); // 2. The rest (Auto parsed) + }, + std::move(tailArgsTuple)); + return jsi::Value::undefined(); + } else { + auto result = std::apply( + [&](auto &&...tailArgs) { + return (model.get()->*FnPtr)( + runtime, args[0], + std::forward(tailArgs)...); + }, + std::move(tailArgsTuple)); + + return jsi_conversion::getJsiValue(std::move(result), runtime); + } + } catch (const std::exception &e) { + throw jsi::JSError(runtime, e.what()); + } + } + // A generic host function that resolves a promise with a result of a // function. JSI arguments are converted to the types provided in the function // signature, and the return value is converted back to JSI before resolving. diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp index 54c0adfd2..fd2c40ee8 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp @@ -1,4 +1,9 @@ #include "VisionModel.h" +#include +#include +#include +#include +#include #include namespace rnexecutorch { @@ -18,45 +23,47 @@ cv::Mat VisionModel::extractFromFrame(jsi::Runtime &runtime, cv::Mat VisionModel::extractFromPixels(jsi::Runtime &runtime, const jsi::Object &pixelData) const { - // Extract width, height, and channels - if (!pixelData.hasProperty(runtime, "width") || - !pixelData.hasProperty(runtime, "height") || - !pixelData.hasProperty(runtime, "channels") || - !pixelData.hasProperty(runtime, "data")) { - throw std::runtime_error( - "Invalid pixel data: must contain width, height, channels, and data"); - } - - int width = pixelData.getProperty(runtime, "width").asNumber(); - int height = pixelData.getProperty(runtime, "height").asNumber(); - int channels = pixelData.getProperty(runtime, "channels").asNumber(); + // PixelData follows TensorPtr structure (dataPtr, sizes, scalarType) + // Use JSI conversion helper to extract the data + auto tensorView = jsi::fromHostObject(runtime, pixelData); - // Get the ArrayBuffer - auto dataValue = pixelData.getProperty(runtime, "data"); - if (!dataValue.isObject() || - !dataValue.asObject(runtime).isArrayBuffer(runtime)) { - throw std::runtime_error( - "pixel data 'data' property must be an ArrayBuffer"); + // Validate dimensions: sizes must be [height, width, channels] + if (tensorView.sizes.size() != 3) { + char errorMessage[100]; + std::snprintf(errorMessage, sizeof(errorMessage), + "Invalid pixel data: sizes must have 3 elements " + "[height, width, channels], got %zu", + tensorView.sizes.size()); + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + errorMessage); } - auto arrayBuffer = dataValue.asObject(runtime).getArrayBuffer(runtime); - size_t expectedSize = width * height * channels; + int height = tensorView.sizes[0]; + int width = tensorView.sizes[1]; + int channels = tensorView.sizes[2]; - if (arrayBuffer.size(runtime) != expectedSize) { - throw std::runtime_error( - "ArrayBuffer size does not match width * height * channels"); + // Pixel data must be RGB (3 channels) and BYTE type + if (channels != 3) { + char errorMessage[100]; + std::snprintf(errorMessage, sizeof(errorMessage), + "Invalid pixel data: expected 3 channels (RGB), got %d", + channels); + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + errorMessage); } - // Create cv::Mat and copy the data - // OpenCV uses BGR/BGRA format internally, but we'll create as-is and let - // preprocessFrame handle conversion - int cvType = (channels == 3) ? CV_8UC3 : CV_8UC4; - cv::Mat image(height, width, cvType); + if (tensorView.scalarType != ScalarType::Byte) { + throw RnExecutorchError( + RnExecutorchErrorCode::InvalidUserInput, + "Invalid pixel data: scalarType must be BYTE (Uint8Array)"); + } - // Copy data from ArrayBuffer to cv::Mat - std::memcpy(image.data, arrayBuffer.data(runtime), expectedSize); + // Create cv::Mat directly from dataPtr (zero-copy view) + uint8_t *dataPtr = static_cast(tensorView.dataPtr); + cv::Mat image(height, width, CV_8UC3, dataPtr); - return image; + // Clone to own the data, since JS memory may be GC'd + return image.clone(); } } // namespace models diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp index f64855131..9fbbaeb74 100644 --- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp @@ -1,4 +1,6 @@ #include "FrameExtractor.h" +#include +#include #include #ifdef __APPLE__ @@ -20,7 +22,8 @@ cv::Mat FrameExtractor::extractFromNativeBuffer(uint64_t bufferPtr) { #elif defined(__ANDROID__) return extractFromAHardwareBuffer(reinterpret_cast(bufferPtr)); #else - throw std::runtime_error("NativeBuffer not supported on this platform"); + throw RnExecutorchError(RnExecutorchErrorCode::NotSupported, + "NativeBuffer not supported on this platform"); #endif } @@ -40,41 +43,25 @@ cv::Mat FrameExtractor::extractFromCVPixelBuffer(void *pixelBuffer) { cv::Mat mat; - // Log pixel format once for debugging - static bool loggedPixelFormat = false; - if (!loggedPixelFormat) { - log(LOG_LEVEL::Debug, "CVPixelBuffer format code: ", pixelFormat); - loggedPixelFormat = true; - } - if (pixelFormat == kCVPixelFormatType_32BGRA) { // BGRA format (most common on iOS when using pixelFormat: 'rgb') - if (!loggedPixelFormat) { - log(LOG_LEVEL::Debug, "Extracting from CVPixelBuffer: BGRA format, ", - width, "x", height, ", stride: ", bytesPerRow); - } mat = cv::Mat(static_cast(height), static_cast(width), CV_8UC4, baseAddress, bytesPerRow); } else if (pixelFormat == kCVPixelFormatType_32RGBA) { // RGBA format - if (!loggedPixelFormat) { - log(LOG_LEVEL::Debug, "Extracting from CVPixelBuffer: RGBA format, ", - width, "x", height, ", stride: ", bytesPerRow); - } mat = cv::Mat(static_cast(height), static_cast(width), CV_8UC4, baseAddress, bytesPerRow); } else if (pixelFormat == kCVPixelFormatType_24RGB) { // RGB format - if (!loggedPixelFormat) { - log(LOG_LEVEL::Debug, "Extracting from CVPixelBuffer: RGB format, ", - width, "x", height, ", stride: ", bytesPerRow); - } mat = cv::Mat(static_cast(height), static_cast(width), CV_8UC3, baseAddress, bytesPerRow); } else { CVPixelBufferUnlockBaseAddress(buffer, kCVPixelBufferLock_ReadOnly); - throw std::runtime_error("Unsupported CVPixelBuffer format: " + - std::to_string(pixelFormat)); + char errorMessage[100]; + std::snprintf(errorMessage, sizeof(errorMessage), + "Unsupported CVPixelBuffer format: %u", pixelFormat); + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + errorMessage); } // Note: We don't unlock here - Vision Camera manages the lifecycle @@ -99,50 +86,36 @@ cv::Mat FrameExtractor::extractFromAHardwareBuffer(void *hardwareBuffer) { buffer, AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, -1, nullptr, &data); if (lockResult != 0) { - throw std::runtime_error("Failed to lock AHardwareBuffer"); + throw RnExecutorchError(RnExecutorchErrorCode::AccessFailed, + "Failed to lock AHardwareBuffer"); } cv::Mat mat; - // Log format once for debugging - static bool loggedFormat = false; - if (!loggedFormat) { - log(LOG_LEVEL::Debug, "AHardwareBuffer format code: ", desc.format); - loggedFormat = true; - } - if (desc.format == AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM) { // RGBA format (expected when using pixelFormat: 'rgb' on Android) - if (!loggedFormat) { - log(LOG_LEVEL::Debug, "Extracting from AHardwareBuffer: RGBA format, ", - desc.width, "x", desc.height, ", stride: ", desc.stride * 4); - } mat = cv::Mat(desc.height, desc.width, CV_8UC4, data, desc.stride * 4); } else if (desc.format == AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM) { // RGBX format (treated as RGBA) - if (!loggedFormat) { - log(LOG_LEVEL::Debug, "Extracting from AHardwareBuffer: RGBX format, ", - desc.width, "x", desc.height, ", stride: ", desc.stride * 4); - } mat = cv::Mat(desc.height, desc.width, CV_8UC4, data, desc.stride * 4); } else if (desc.format == AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM) { // RGB format (less common) - if (!loggedFormat) { - log(LOG_LEVEL::Debug, "Extracting from AHardwareBuffer: RGB format, ", - desc.width, "x", desc.height, ", stride: ", desc.stride * 3); - } mat = cv::Mat(desc.height, desc.width, CV_8UC3, data, desc.stride * 3); } else { AHardwareBuffer_unlock(buffer, nullptr); - throw std::runtime_error("Unsupported AHardwareBuffer format: " + - std::to_string(desc.format)); + char errorMessage[100]; + std::snprintf(errorMessage, sizeof(errorMessage), + "Unsupported AHardwareBuffer format: %u", desc.format); + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + errorMessage); } // Note: We don't unlock here - Vision Camera manages the lifecycle return mat; #else - throw std::runtime_error("AHardwareBuffer requires Android API 26+"); + throw RnExecutorchError(RnExecutorchErrorCode::NotSupported, + "AHardwareBuffer requires Android API 26+"); #endif // __ANDROID_API__ >= 26 } #endif // __ANDROID__ diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp index 02faa072d..087aec816 100644 --- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp @@ -1,7 +1,8 @@ #include "FrameProcessor.h" #include "FrameExtractor.h" +#include +#include #include -#include namespace rnexecutorch { namespace utils { @@ -16,34 +17,21 @@ cv::Mat FrameProcessor::extractFrame(jsi::Runtime &runtime, // Try zero-copy path first (nativeBuffer) if (hasNativeBuffer(runtime, frameData)) { - static bool loggedPath = false; - if (!loggedPath) { - log(LOG_LEVEL::Debug, "FrameProcessor: Using zero-copy nativeBuffer"); - loggedPath = true; - } - try { - return extractFromNativeBuffer(runtime, frameData, width, height); + return extractFromNativeBuffer(runtime, frameData); } catch (const std::exception &e) { - log(LOG_LEVEL::Debug, - "FrameProcessor: nativeBuffer extraction failed: ", e.what()); - log(LOG_LEVEL::Debug, "FrameProcessor: Falling back to ArrayBuffer"); + // Fallback to ArrayBuffer on failure } } // Fallback to ArrayBuffer path (with copy) if (frameData.hasProperty(runtime, "data")) { - static bool loggedPath = false; - if (!loggedPath) { - log(LOG_LEVEL::Debug, "FrameProcessor: Using ArrayBuffer (with copy)"); - loggedPath = true; - } - return extractFromArrayBuffer(runtime, frameData, width, height); } // No valid frame data source - throw std::runtime_error( + throw RnExecutorchError( + RnExecutorchErrorCode::InvalidUserInput, "FrameProcessor: No valid frame data (neither nativeBuffer nor data " "property found)"); } @@ -52,8 +40,9 @@ cv::Size FrameProcessor::getFrameSize(jsi::Runtime &runtime, const jsi::Object &frameData) { if (!frameData.hasProperty(runtime, "width") || !frameData.hasProperty(runtime, "height")) { - throw std::runtime_error("FrameProcessor: Frame data missing width or " - "height property"); + throw RnExecutorchError( + RnExecutorchErrorCode::InvalidUserInput, + "FrameProcessor: Frame data missing width or height property"); } int width = @@ -70,8 +59,7 @@ bool FrameProcessor::hasNativeBuffer(jsi::Runtime &runtime, } cv::Mat FrameProcessor::extractFromNativeBuffer(jsi::Runtime &runtime, - const jsi::Object &frameData, - int width, int height) { + const jsi::Object &frameData) { auto nativeBufferValue = frameData.getProperty(runtime, "nativeBuffer"); // Handle bigint pointer value from JavaScript @@ -79,15 +67,8 @@ cv::Mat FrameProcessor::extractFromNativeBuffer(jsi::Runtime &runtime, nativeBufferValue.asBigInt(runtime).asUint64(runtime)); // Use FrameExtractor to get cv::Mat from platform-specific buffer - cv::Mat frame = FrameExtractor::extractFromNativeBuffer(bufferPtr); - - // Validate extracted frame dimensions match expected - if (frame.cols != width || frame.rows != height) { - log(LOG_LEVEL::Debug, "FrameProcessor: Dimension mismatch - expected ", - width, "x", height, " but got ", frame.cols, "x", frame.rows); - } - - return frame; + // Native buffer contains all metadata (width, height, format) + return FrameExtractor::extractFromNativeBuffer(bufferPtr); } cv::Mat FrameProcessor::extractFromArrayBuffer(jsi::Runtime &runtime, @@ -103,39 +84,22 @@ cv::Mat FrameProcessor::extractFromArrayBuffer(jsi::Runtime &runtime, size_t expectedRGBAStride = width * 4; size_t expectedRGBStride = width * 3; - cv::Mat frame; - if (stride == expectedRGBAStride || bufferSize >= width * height * 4) { // RGBA format with potential padding - frame = cv::Mat(height, width, CV_8UC4, data, stride); - - static bool loggedFormat = false; - if (!loggedFormat) { - log(LOG_LEVEL::Debug, - "FrameProcessor: ArrayBuffer format is RGBA, " - "stride: ", - stride); - loggedFormat = true; - } + return cv::Mat(height, width, CV_8UC4, data, stride); } else if (stride >= expectedRGBStride) { // RGB format - frame = cv::Mat(height, width, CV_8UC3, data, stride); - - static bool loggedFormat = false; - if (!loggedFormat) { - log(LOG_LEVEL::Debug, - "FrameProcessor: ArrayBuffer format is RGB, stride: ", stride); - loggedFormat = true; - } + return cv::Mat(height, width, CV_8UC3, data, stride); } else { - throw std::runtime_error( - "FrameProcessor: Unexpected buffer size - expected " + - std::to_string(expectedRGBStride) + " or " + - std::to_string(expectedRGBAStride) + " bytes per row, got " + - std::to_string(stride)); + char errorMessage[200]; + std::snprintf( + errorMessage, sizeof(errorMessage), + "FrameProcessor: Unexpected buffer size - expected %zu or %zu bytes " + "per row, got %zu", + expectedRGBStride, expectedRGBAStride, stride); + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + errorMessage); } - - return frame; } } // namespace utils diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h index e37b5bfd6..0838b6594 100644 --- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h +++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h @@ -45,8 +45,8 @@ class FrameProcessor { * * @return cv::Mat wrapping or containing the frame data * - * @throws std::runtime_error if neither nativeBuffer nor data is available - * @throws std::runtime_error if nativeBuffer extraction fails + * @throws RnExecutorchError if neither nativeBuffer nor data is available + * @throws RnExecutorchError if nativeBuffer extraction fails * * @note The returned cv::Mat may not own the data (zero-copy path). * Caller must ensure the source frame remains valid during use. @@ -62,7 +62,7 @@ class FrameProcessor { * * @return cv::Size with frame width and height * - * @throws std::runtime_error if width or height properties are missing + * @throws RnExecutorchError if width or height properties are missing */ static cv::Size getFrameSize(jsi::Runtime &runtime, const jsi::Object &frameData); @@ -81,15 +81,15 @@ class FrameProcessor { /** * @brief Extract frame from nativeBuffer pointer (zero-copy) * + * Native buffer contains all metadata (width, height, format), so no need to + * pass dimensions separately. + * * @param runtime JSI runtime * @param frameData JSI object with nativeBuffer property - * @param width Frame width - * @param height Frame height * @return cv::Mat wrapping the native buffer data */ static cv::Mat extractFromNativeBuffer(jsi::Runtime &runtime, - const jsi::Object &frameData, - int width, int height); + const jsi::Object &frameData); /** * @brief Extract frame from ArrayBuffer (with copy) diff --git a/packages/react-native-executorch/src/hooks/useModule.ts b/packages/react-native-executorch/src/hooks/useModule.ts index 624094afb..f5f260787 100644 --- a/packages/react-native-executorch/src/hooks/useModule.ts +++ b/packages/react-native-executorch/src/hooks/useModule.ts @@ -6,7 +6,6 @@ interface Module { load: (...args: any[]) => Promise; forward: (...args: any[]) => Promise; delete: () => void; - nativeModule?: any; // JSI host object with native methods } interface ModuleConstructor { diff --git a/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts b/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts index 06acf6654..72e797437 100644 --- a/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts +++ b/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts @@ -1,25 +1,7 @@ import { BaseModule } from '../BaseModule'; import { RnExecutorchErrorCode } from '../../errors/ErrorCodes'; import { RnExecutorchError } from '../../errors/errorUtils'; - -/** - * Raw pixel data for vision model inference. - */ -export type PixelData = { - data: ArrayBuffer; - width: number; - height: number; - channels: number; -}; - -/** - * VisionCamera Frame object for real-time processing. - */ -export type Frame = { - getNativeBuffer(): { pointer: number; release(): void }; - width: number; - height: number; -}; +import { Frame, PixelData, ScalarType } from '../../types/common'; /** * Base class for computer vision models that support multiple input types. @@ -74,8 +56,6 @@ export abstract class VisionModule extends BaseModule { nativeBuffer = frame.getNativeBuffer(); const frameData = { nativeBuffer: nativeBuffer.pointer, - width: frame.width, - height: frame.height, }; return nativeGenerateFromFrame(frameData, ...args); } finally { @@ -107,10 +87,9 @@ export abstract class VisionModule extends BaseModule { * * // Pixel data (async) * const result2 = await model.forward({ - * data: pixelBuffer, - * width: 640, - * height: 480, - * channels: 3 + * dataPtr: new Uint8Array(pixelBuffer), + * sizes: [480, 640, 3], + * scalarType: ScalarType.BYTE * }); * * // For VisionCamera frames, use runOnFrame in worklet: @@ -136,11 +115,13 @@ export abstract class VisionModule extends BaseModule { return await this.nativeModule.generateFromString(input, ...args); } else if ( typeof input === 'object' && - 'data' in input && - input.data instanceof ArrayBuffer && - typeof input.width === 'number' && - typeof input.height === 'number' && - typeof input.channels === 'number' + 'dataPtr' in input && + input.dataPtr instanceof Uint8Array && + 'sizes' in input && + Array.isArray(input.sizes) && + input.sizes.length === 3 && + 'scalarType' in input && + input.scalarType === ScalarType.BYTE ) { // Pixel data β†’ generateFromPixels() return await this.nativeModule.generateFromPixels(input, ...args); diff --git a/packages/react-native-executorch/src/types/common.ts b/packages/react-native-executorch/src/types/common.ts index 439e18597..7a3ca8afc 100644 --- a/packages/react-native-executorch/src/types/common.ts +++ b/packages/react-native-executorch/src/types/common.ts @@ -151,6 +151,45 @@ export type LabelEnum = Readonly>; * @category Types */ export type Triple = readonly [T, T, T]; +/** + * Represents raw pixel data in RGB format for vision models. + * + * This type extends TensorPtr with constraints specific to image data: + * - dataPtr must be Uint8Array (8-bit unsigned integers) + * - scalarType is always BYTE (ScalarType.BYTE) + * - sizes represents [height, width, channels] where channels must be 3 (RGB) + * + * @category Types + * @example + * ```typescript + * const pixelData: PixelData = { + * dataPtr: new Uint8Array(width * height * 3), // RGB pixel data + * sizes: [height, width, 3], // [height, width, channels] + * scalarType: ScalarType.BYTE + * }; + * ``` + */ +export interface PixelData extends Omit { + /** + * RGB pixel data as Uint8Array. + * Expected format: RGB (3 channels), not RGBA or BGRA. + * Size must equal: width * height * 3 + */ + dataPtr: Uint8Array; + + /** + * Dimensions of the pixel data: [height, width, channels]. + * - sizes[0]: height (number of rows) + * - sizes[1]: width (number of columns) + * - sizes[2]: channels (must be 3 for RGB) + */ + sizes: [number, number, 3]; + + /** + * Scalar type is always BYTE for pixel data. + */ + scalarType: ScalarType.BYTE; +} /** * Frame data for vision model processing. @@ -158,13 +197,7 @@ export type Triple = readonly [T, T, T]; * 1. ArrayBuffer mode (with memory copy) - Compatible with all platforms * 2. NativeBuffer mode (zero-copy) - Better performance with Vision Camera v5 */ -export interface FrameData { - /** - * Raw pixel data as ArrayBuffer (requires memory copy). - * Use this for compatibility or when getNativeBuffer is not available. - */ - data?: ArrayBuffer | ArrayBufferLike; - +export interface Frame { /** * Pointer to native platform buffer (zero-copy, best performance). * - On iOS: CVPixelBufferRef pointer @@ -172,15 +205,5 @@ export interface FrameData { * * Obtain from Vision Camera v5: `frame.getNativeBuffer().pointer` */ - nativeBuffer?: bigint; - - /** - * Frame width in pixels - */ - width: number; - - /** - * Frame height in pixels - */ - height: number; + getNativeBuffer(): { pointer: number; release(): void }; } diff --git a/packages/react-native-executorch/src/types/objectDetection.ts b/packages/react-native-executorch/src/types/objectDetection.ts index 2dddaad64..abb0142a7 100644 --- a/packages/react-native-executorch/src/types/objectDetection.ts +++ b/packages/react-native-executorch/src/types/objectDetection.ts @@ -237,10 +237,4 @@ export interface ObjectDetectionType { * @returns Array of Detection objects representing detected items in the frame. */ runOnFrame: ((frame: any, detectionThreshold?: number) => Detection[]) | null; - - /** - * Direct reference to the module instance for advanced use cases. - * Most users should use `forward()` for async processing or `runOnFrame` for real-time frame processing. - */ - moduleInstance: any; } From 53bcd96ae50f327859a6ae96a6c04609aa2bde05 Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Tue, 17 Feb 2026 17:51:37 +0100 Subject: [PATCH 08/37] fix: change Frame import in BaseModule --- packages/react-native-executorch/src/modules/BaseModule.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/react-native-executorch/src/modules/BaseModule.ts b/packages/react-native-executorch/src/modules/BaseModule.ts index 0870a30b6..41a2da6cf 100644 --- a/packages/react-native-executorch/src/modules/BaseModule.ts +++ b/packages/react-native-executorch/src/modules/BaseModule.ts @@ -1,4 +1,4 @@ -import { ResourceSource } from '../types/common'; +import { Frame, ResourceSource } from '../types/common'; import { TensorPtr } from '../types/common'; /** @@ -51,9 +51,9 @@ export abstract class BaseModule { * @param args Additional model-specific arguments (e.g., threshold, options) * @returns Model-specific output (e.g., detections, classifications, embeddings) * - * @see {@link FrameData} for frame data format details + * @see {@link Frame} for frame data format details */ - public generateFromFrame!: (frameData: FrameData, ...args: any[]) => any; + public generateFromFrame!: (frameData: Frame, ...args: any[]) => any; /** * Load the model and prepare it for inference. From cd0b123c76f95785d3ad860c187e4e6f9aae15db Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Wed, 18 Feb 2026 12:49:15 +0100 Subject: [PATCH 09/37] feat: use TensorPtrish type for Pixel data input --- .../app/object_detection/index.tsx | 119 ++++++------------ .../rnexecutorch/models/VisionModel.cpp | 13 +- .../common/rnexecutorch/models/VisionModel.h | 29 ++--- .../rnexecutorch/utils/FrameExtractor.cpp | 2 +- .../rnexecutorch/utils/FrameProcessor.cpp | 12 +- .../modules/computer_vision/VisionModule.ts | 2 +- .../src/types/common.ts | 2 +- .../src/types/objectDetection.ts | 22 ++-- 8 files changed, 72 insertions(+), 129 deletions(-) diff --git a/apps/computer-vision/app/object_detection/index.tsx b/apps/computer-vision/app/object_detection/index.tsx index 54c0eb18f..d843682eb 100644 --- a/apps/computer-vision/app/object_detection/index.tsx +++ b/apps/computer-vision/app/object_detection/index.tsx @@ -4,6 +4,8 @@ import { Detection, useObjectDetection, SSDLITE_320_MOBILENET_V3_LARGE, + ScalarType, + PixelData, } from 'react-native-executorch'; import { View, StyleSheet, Image, TouchableOpacity, Text } from 'react-native'; import ImageWithBboxes from '../../components/ImageWithBboxes'; @@ -11,62 +13,6 @@ import React, { useContext, useEffect, useState } from 'react'; import { GeneratingContext } from '../../context'; import ScreenWrapper from '../../ScreenWrapper'; import ColorPalette from '../../colors'; -import { Images } from 'react-native-nitro-image'; - -// Helper function to convert BGRA to RGB -function convertBGRAtoRGB( - buffer: ArrayBuffer, - width: number, - height: number -): ArrayBuffer { - const source = new Uint8Array(buffer); - const rgb = new Uint8Array(width * height * 3); - - for (let i = 0; i < width * height; i++) { - // BGRA format: [B, G, R, A] β†’ RGB: [R, G, B] - rgb[i * 3 + 0] = source[i * 4 + 2]; // R - rgb[i * 3 + 1] = source[i * 4 + 1]; // G - rgb[i * 3 + 2] = source[i * 4 + 0]; // B - } - - return rgb.buffer; -} - -// Helper function to convert image URI to raw RGB pixel data -async function imageUriToPixelData( - uri: string, - targetWidth: number, - targetHeight: number -): Promise<{ - data: ArrayBuffer; - width: number; - height: number; - channels: number; -}> { - try { - // Load image and resize to target dimensions - const image = await Images.loadFromFileAsync(uri); - const resized = image.resize(targetWidth, targetHeight); - - // Get pixel data as ArrayBuffer (BGRA format from NitroImage) - const rawPixelData = resized.toRawPixelData(); - const buffer = - rawPixelData instanceof ArrayBuffer ? rawPixelData : rawPixelData.buffer; - - // Convert BGRA to RGB as required by the native API - const rgbBuffer = convertBGRAtoRGB(buffer, targetWidth, targetHeight); - - return { - data: rgbBuffer, - width: targetWidth, - height: targetHeight, - channels: 3, // RGB - }; - } catch (error) { - console.error('Error loading image with NitroImage:', error); - throw error; - } -} export default function ObjectDetectionScreen() { const [imageUri, setImageUri] = useState(''); @@ -109,30 +55,45 @@ export default function ObjectDetectionScreen() { }; const runForwardPixels = async () => { - if (imageUri && imageDimensions) { - try { - console.log('Converting image to pixel data...'); - // Use original dimensions - let the model resize internally - const pixelData = await imageUriToPixelData( - imageUri, - imageDimensions.width, - imageDimensions.height - ); - - console.log('Running forward with pixel data...', { - width: pixelData.width, - height: pixelData.height, - channels: pixelData.channels, - dataSize: pixelData.data.byteLength, - }); - - // Run inference using unified forward() API - const output = await ssdLite.forward(pixelData, 0.3); - console.log('Pixel data result:', output.length, 'detections'); - setResults(output); - } catch (e) { - console.error('Error in runForwardPixels:', e); + try { + console.log('Testing with hardcoded pixel data...'); + + // Create a simple 320x320 test image (all zeros - black image) + // In a real scenario, you would load actual image pixel data here + const width = 320; + const height = 320; + const channels = 3; // RGB + + // Create a black image (you can replace this with actual pixel data) + const rgbData = new Uint8Array(width * height * channels); + + // Optionally, add some test pattern (e.g., white square in center) + for (let y = 100; y < 220; y++) { + for (let x = 100; x < 220; x++) { + const idx = (y * width + x) * 3; + rgbData[idx + 0] = 255; // R + rgbData[idx + 1] = 255; // G + rgbData[idx + 2] = 255; // B + } } + + const pixelData: PixelData = { + dataPtr: rgbData, + sizes: [height, width, channels], + scalarType: ScalarType.BYTE, + }; + + console.log('Running forward with hardcoded pixel data...', { + sizes: pixelData.sizes, + dataSize: pixelData.dataPtr.byteLength, + }); + + // Run inference using unified forward() API + const output = await ssdLite.forward(pixelData, 0.3); + console.log('Pixel data result:', output.length, 'detections'); + setResults(output); + } catch (e) { + console.error('Error in runForwardPixels:', e); } }; diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp index fd2c40ee8..8155b8819 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp @@ -2,8 +2,6 @@ #include #include #include -#include -#include #include namespace rnexecutorch { @@ -21,12 +19,7 @@ cv::Mat VisionModel::extractFromFrame(jsi::Runtime &runtime, return preprocessFrame(frame); } -cv::Mat VisionModel::extractFromPixels(jsi::Runtime &runtime, - const jsi::Object &pixelData) const { - // PixelData follows TensorPtr structure (dataPtr, sizes, scalarType) - // Use JSI conversion helper to extract the data - auto tensorView = jsi::fromHostObject(runtime, pixelData); - +cv::Mat VisionModel::extractFromPixels(const JSTensorViewIn &tensorView) const { // Validate dimensions: sizes must be [height, width, channels] if (tensorView.sizes.size() != 3) { char errorMessage[100]; @@ -59,11 +52,11 @@ cv::Mat VisionModel::extractFromPixels(jsi::Runtime &runtime, } // Create cv::Mat directly from dataPtr (zero-copy view) + // Data is valid for the duration of this synchronous call uint8_t *dataPtr = static_cast(tensorView.dataPtr); cv::Mat image(height, width, CV_8UC3, dataPtr); - // Clone to own the data, since JS memory may be GC'd - return image.clone(); + return image; } } // namespace models diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h index 9ba5cf7e4..c362d745f 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h @@ -129,40 +129,35 @@ class VisionModel : public BaseModel { const jsi::Value &frameData) const; /** - * @brief Extract cv::Mat from raw pixel data (ArrayBuffer) sent from + * @brief Extract cv::Mat from raw pixel data (TensorPtr) sent from * JavaScript * * This method enables users to run inference on raw pixel data without file * I/O. Useful for processing images already in memory (e.g., from canvas, * image library). * - * @param runtime JSI runtime - * @param pixelData JSI object containing: - * - data: ArrayBuffer with raw pixel values - * - width: number - image width - * - height: number - image height - * - channels: number - number of channels (3 for RGB, 4 for - * RGBA) + * @param tensorView JSTensorViewIn containing: + * - dataPtr: Pointer to raw pixel values (RGB format) + * - sizes: [height, width, channels] - must be 3D + * - scalarType: Must be ScalarType::Byte (Uint8Array) * * @return cv::Mat containing the pixel data * - * @throws std::runtime_error if pixelData format is invalid + * @throws RnExecutorchError if tensorView format is invalid * * @note The returned cv::Mat owns a copy of the data - * @note Expected pixel format: RGB or RGBA, row-major order + * @note Expected pixel format: RGB (3 channels), row-major order * @note Typical usage from JS: * @code - * const pixels = new Uint8Array([...]); // Raw pixel data + * const pixels = new Uint8Array([...]); // Raw RGB pixel data * const result = model.generateFromPixels({ - * data: pixels.buffer, - * width: 640, - * height: 480, - * channels: 3 + * dataPtr: pixels, + * sizes: [480, 640, 3], + * scalarType: ScalarType.BYTE * }, 0.5); * @endcode */ - cv::Mat extractFromPixels(jsi::Runtime &runtime, - const jsi::Object &pixelData) const; + cv::Mat extractFromPixels(const JSTensorViewIn &tensorView) const; }; } // namespace models diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp index 9fbbaeb74..900eae297 100644 --- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp @@ -86,7 +86,7 @@ cv::Mat FrameExtractor::extractFromAHardwareBuffer(void *hardwareBuffer) { buffer, AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, -1, nullptr, &data); if (lockResult != 0) { - throw RnExecutorchError(RnExecutorchErrorCode::AccessFailed, + throw RnExecutorchError(RnExecutorchErrorCode::UnknownError, "Failed to lock AHardwareBuffer"); } diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp index 087aec816..5e593dfd0 100644 --- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp @@ -9,13 +9,8 @@ namespace utils { cv::Mat FrameProcessor::extractFrame(jsi::Runtime &runtime, const jsi::Object &frameData) { - // Get frame dimensions - int width = - static_cast(frameData.getProperty(runtime, "width").asNumber()); - int height = - static_cast(frameData.getProperty(runtime, "height").asNumber()); - // Try zero-copy path first (nativeBuffer) + // Native buffer contains dimensions, so we don't need width/height properties if (hasNativeBuffer(runtime, frameData)) { try { return extractFromNativeBuffer(runtime, frameData); @@ -25,7 +20,12 @@ cv::Mat FrameProcessor::extractFrame(jsi::Runtime &runtime, } // Fallback to ArrayBuffer path (with copy) + // Get frame dimensions for ArrayBuffer path if (frameData.hasProperty(runtime, "data")) { + int width = + static_cast(frameData.getProperty(runtime, "width").asNumber()); + int height = + static_cast(frameData.getProperty(runtime, "height").asNumber()); return extractFromArrayBuffer(runtime, frameData, width, height); } diff --git a/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts b/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts index 72e797437..d6a0038ee 100644 --- a/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts +++ b/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts @@ -73,7 +73,7 @@ export abstract class VisionModule extends BaseModule { * 1. **String path/URI**: File path, URL, or Base64-encoded string * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage) * - * **Note**: For VisionCamera frame processing, use `forwardSync` instead. + * **Note**: For VisionCamera frame processing, use `runOnFrame` instead. * This method is async and cannot be called in worklet context. * * @param input - Image source (string path or PixelData object) diff --git a/packages/react-native-executorch/src/types/common.ts b/packages/react-native-executorch/src/types/common.ts index 7a3ca8afc..1ebfb3534 100644 --- a/packages/react-native-executorch/src/types/common.ts +++ b/packages/react-native-executorch/src/types/common.ts @@ -205,5 +205,5 @@ export interface Frame { * * Obtain from Vision Camera v5: `frame.getNativeBuffer().pointer` */ - getNativeBuffer(): { pointer: number; release(): void }; + getNativeBuffer(): { pointer: bigint; release(): void }; } diff --git a/packages/react-native-executorch/src/types/objectDetection.ts b/packages/react-native-executorch/src/types/objectDetection.ts index abb0142a7..c2281598a 100644 --- a/packages/react-native-executorch/src/types/objectDetection.ts +++ b/packages/react-native-executorch/src/types/objectDetection.ts @@ -1,5 +1,5 @@ import { RnExecutorchError } from '../errors/errorUtils'; -import { ResourceSource } from './common'; +import { ResourceSource, PixelData, Frame } from './common'; /** * Represents a bounding box for a detected object in an image. @@ -190,22 +190,14 @@ export interface ObjectDetectionType { * * // Pixel data * const detections2 = await model.forward({ - * data: pixelBuffer, - * width: 640, - * height: 480, - * channels: 3 + * dataPtr: new Uint8Array(rgbPixels), + * sizes: [480, 640, 3], + * scalarType: ScalarType.BYTE * }); * ``` */ forward: ( - input: - | string - | { - data: ArrayBuffer; - width: number; - height: number; - channels: number; - }, + input: string | PixelData, detectionThreshold?: number ) => Promise; @@ -236,5 +228,7 @@ export interface ObjectDetectionType { * @param detectionThreshold - The threshold for detection sensitivity. Default is 0.7. * @returns Array of Detection objects representing detected items in the frame. */ - runOnFrame: ((frame: any, detectionThreshold?: number) => Detection[]) | null; + runOnFrame: + | ((frame: Frame, detectionThreshold?: number) => Detection[]) + | null; } From e001142fee1b3fa946efbc2ae387f31c11d670fe Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Wed, 18 Feb 2026 13:03:22 +0100 Subject: [PATCH 10/37] refactor: add or remove empty lines --- .../rnexecutorch/models/classification/Classification.cpp | 2 +- .../rnexecutorch/models/embeddings/image/ImageEmbeddings.h | 2 +- .../models/image_segmentation/BaseImageSegmentation.h | 2 +- .../models/image_segmentation/ImageSegmentation.cpp | 2 +- .../common/rnexecutorch/models/style_transfer/StyleTransfer.h | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp index b9fad1b88..0fba07108 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp @@ -73,4 +73,4 @@ Classification::postprocess(const Tensor &tensor) { return probs; } -} // namespace rnexecutorch::models::classification \ No newline at end of file +} // namespace rnexecutorch::models::classification diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h index 9a1d6429b..7e114e939 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h @@ -27,4 +27,4 @@ class ImageEmbeddings final : public BaseEmbeddings { REGISTER_CONSTRUCTOR(models::embeddings::ImageEmbeddings, std::string, std::shared_ptr); -} // namespace rnexecutorch \ No newline at end of file +} // namespace rnexecutorch diff --git a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h index 34ad8dffd..f46f41d69 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h @@ -58,4 +58,4 @@ class BaseImageSegmentation : public BaseModel { REGISTER_CONSTRUCTOR(models::image_segmentation::BaseImageSegmentation, std::string, std::vector, std::vector, std::shared_ptr); -} // namespace rnexecutorch \ No newline at end of file +} // namespace rnexecutorch diff --git a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp index 08f2a4683..a2c1ae865 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp @@ -167,4 +167,4 @@ std::shared_ptr ImageSegmentation::populateDictionary( return dictPtr; } -} // namespace rnexecutorch::models::image_segmentation \ No newline at end of file +} // namespace rnexecutorch::models::image_segmentation diff --git a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h index 8eed3c888..73744c4d8 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h @@ -33,4 +33,4 @@ class StyleTransfer : public BaseModel { REGISTER_CONSTRUCTOR(models::style_transfer::StyleTransfer, std::string, std::shared_ptr); -} // namespace rnexecutorch \ No newline at end of file +} // namespace rnexecutorch From ca60d88c18305b19a1a1eec33fe514fc6d16067b Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Thu, 19 Feb 2026 22:34:20 +0100 Subject: [PATCH 11/37] fix: errors after rebase --- .../host_objects/JsiConversions.h | 10 - .../host_objects/ModelHostObject.h | 7 +- .../metaprogramming/FunctionHelpers.h | 67 +++- .../metaprogramming/TypeConcepts.h | 5 + yarn.lock | 311 ++++++++++++++++-- 5 files changed, 364 insertions(+), 36 deletions(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h index b4409b0f2..5fc8615ea 100644 --- a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h +++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h @@ -360,16 +360,6 @@ inline jsi::Value getJsiValue(uint64_t val, jsi::Runtime &runtime) { return {runtime, bigInt}; } -inline jsi::Value getJsiValue(const std::vector &vec, - jsi::Runtime &runtime) { - jsi::Array array(runtime, vec.size()); - for (size_t i = 0; i < vec.size(); i++) { - // JS numbers are doubles. Large uint64s > 2^53 will lose precision. - array.setValueAtIndex(runtime, i, jsi::Value(static_cast(vec[i]))); - } - return {runtime, array}; -} - inline jsi::Value getJsiValue(const std::vector &vec, jsi::Runtime &runtime) { jsi::Array array(runtime, vec.size()); diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h index 9554f1888..3190bc6f4 100644 --- a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h +++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -171,10 +172,10 @@ template class ModelHostObject : public JsiHostObject { "stream")); } - if constexpr (meta::DerivedFromOrSameAs) { + if constexpr (meta::HasGenerateFromFrame) { addFunctions(JSI_EXPORT_FUNCTION( - ModelHostObject, synchronousHostFunction<&Model::streamStop>, - "streamStop")); + ModelHostObject, visionHostFunction<&Model::generateFromFrame>, + "generateFromFrame")); } if constexpr (meta::HasGenerateFromPixels) { diff --git a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/FunctionHelpers.h b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/FunctionHelpers.h index 8290a810b..a48aa0119 100644 --- a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/FunctionHelpers.h +++ b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/FunctionHelpers.h @@ -3,12 +3,39 @@ #include #include #include +#include #include namespace rnexecutorch::meta { using namespace facebook; +// ========================================================================= +// 1. Function Traits (Extracts Arity, Return Type, Args) +// ========================================================================= + +template struct FunctionTraits; + +// Specialization for Member Functions +template +struct FunctionTraits { + static constexpr std::size_t arity = sizeof...(Args); + using return_type = R; + using args_tuple = std::tuple; +}; + +// Specialization for const Member Functions +template +struct FunctionTraits { + static constexpr std::size_t arity = sizeof...(Args); + using return_type = R; + using args_tuple = std::tuple; +}; + +// ========================================================================= +// 2. Argument Counting Helpers +// ========================================================================= + template constexpr std::size_t getArgumentCount(R (Model::*f)(Types...)) { return sizeof...(Types); @@ -19,6 +46,10 @@ constexpr std::size_t getArgumentCount(R (Model::*f)(Types...) const) { return sizeof...(Types); } +// ========================================================================= +// 3. JSI -> Tuple Conversion Logic +// ========================================================================= + template std::tuple fillTupleFromArgs(std::index_sequence, const jsi::Value *args, @@ -31,7 +62,6 @@ std::tuple fillTupleFromArgs(std::index_sequence, * arguments for method supplied with a pointer. The types in the tuple are * inferred from the method pointer. */ - template std::tuple createArgsTupleFromJsi(R (Model::*f)(Types...), const jsi::Value *args, @@ -47,4 +77,37 @@ std::tuple createArgsTupleFromJsi(R (Model::*f)(Types...) const, return fillTupleFromArgs(std::index_sequence_for{}, args, runtime); } -} // namespace rnexecutorch::meta \ No newline at end of file + +// Overload for free functions (used by TailSignature dummy) +template +std::tuple createArgsTupleFromJsi(void (*f)(Types...), + const jsi::Value *args, + jsi::Runtime &runtime) { + return fillTupleFromArgs(std::index_sequence_for{}, args, + runtime); +} + +// ========================================================================= +// 4. Tail Signature Helper (Crucial for Vision Functions) +// ========================================================================= + +// Extracts the "Tail" arguments of a function signature, skipping the first +// two arguments (Runtime and FrameValue). +template struct TailSignature; + +// Non-const member function specialization +template +struct TailSignature { + // A dummy function that has the signature of just the "Rest" arguments. + static void dummy(Rest...) {} +}; + +// Const member function specialization +template +struct TailSignature { + static void dummy(Rest...) {} +}; + +} // namespace rnexecutorch::meta diff --git a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h index 8100a471b..f625bf6e7 100644 --- a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h +++ b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h @@ -21,6 +21,11 @@ concept HasGenerateFromPixels = requires(T t) { { &T::generateFromPixels }; }; +template +concept HasGenerateFromFrame = requires(T t) { + { &T::generateFromFrame }; +}; + template concept HasEncode = requires(T t) { { &T::encode }; diff --git a/yarn.lock b/yarn.lock index 436005c8d..3d2d9f7ee 100644 --- a/yarn.lock +++ b/yarn.lock @@ -53,6 +53,17 @@ __metadata: languageName: node linkType: hard +"@babel/code-frame@npm:^7.28.6, @babel/code-frame@npm:^7.29.0": + version: 7.29.0 + resolution: "@babel/code-frame@npm:7.29.0" + dependencies: + "@babel/helper-validator-identifier": "npm:^7.28.5" + js-tokens: "npm:^4.0.0" + picocolors: "npm:^1.1.1" + checksum: 10/199e15ff89007dd30675655eec52481cb245c9fdf4f81e4dc1f866603b0217b57aff25f5ffa0a95bbc8e31eb861695330cd7869ad52cc211aa63016320ef72c5 + languageName: node + linkType: hard + "@babel/compat-data@npm:^7.20.5, @babel/compat-data@npm:^7.27.2, @babel/compat-data@npm:^7.27.7, @babel/compat-data@npm:^7.28.5": version: 7.28.5 resolution: "@babel/compat-data@npm:7.28.5" @@ -110,6 +121,19 @@ __metadata: languageName: node linkType: hard +"@babel/generator@npm:^7.29.0": + version: 7.29.1 + resolution: "@babel/generator@npm:7.29.1" + dependencies: + "@babel/parser": "npm:^7.29.0" + "@babel/types": "npm:^7.29.0" + "@jridgewell/gen-mapping": "npm:^0.3.12" + "@jridgewell/trace-mapping": "npm:^0.3.28" + jsesc: "npm:^3.0.2" + checksum: 10/61fe4ddd6e817aa312a14963ccdbb5c9a8c57e8b97b98d19a8a99ccab2215fda1a5f52bc8dd8d2e3c064497ddeb3ab8ceb55c76fa0f58f8169c34679d2256fe0 + languageName: node + linkType: hard + "@babel/helper-annotate-as-pure@npm:^7.27.1, @babel/helper-annotate-as-pure@npm:^7.27.3": version: 7.27.3 resolution: "@babel/helper-annotate-as-pure@npm:7.27.3" @@ -149,6 +173,23 @@ __metadata: languageName: node linkType: hard +"@babel/helper-create-class-features-plugin@npm:^7.28.6": + version: 7.28.6 + resolution: "@babel/helper-create-class-features-plugin@npm:7.28.6" + dependencies: + "@babel/helper-annotate-as-pure": "npm:^7.27.3" + "@babel/helper-member-expression-to-functions": "npm:^7.28.5" + "@babel/helper-optimise-call-expression": "npm:^7.27.1" + "@babel/helper-replace-supers": "npm:^7.28.6" + "@babel/helper-skip-transparent-expression-wrappers": "npm:^7.27.1" + "@babel/traverse": "npm:^7.28.6" + semver: "npm:^6.3.1" + peerDependencies: + "@babel/core": ^7.0.0 + checksum: 10/11f55607fcf66827ade745c0616aa3c6086aa655c0fab665dd3c4961829752e4c94c942262db30c4831ef9bce37ad444722e85ef1b7136587e28c6b1ef8ad43c + languageName: node + linkType: hard + "@babel/helper-create-regexp-features-plugin@npm:^7.18.6, @babel/helper-create-regexp-features-plugin@npm:^7.27.1": version: 7.28.5 resolution: "@babel/helper-create-regexp-features-plugin@npm:7.28.5" @@ -242,6 +283,13 @@ __metadata: languageName: node linkType: hard +"@babel/helper-plugin-utils@npm:^7.28.6": + version: 7.28.6 + resolution: "@babel/helper-plugin-utils@npm:7.28.6" + checksum: 10/21c853bbc13dbdddf03309c9a0477270124ad48989e1ad6524b83e83a77524b333f92edd2caae645c5a7ecf264ec6d04a9ebe15aeb54c7f33c037b71ec521e4a + languageName: node + linkType: hard + "@babel/helper-remap-async-to-generator@npm:^7.18.9, @babel/helper-remap-async-to-generator@npm:^7.27.1": version: 7.27.1 resolution: "@babel/helper-remap-async-to-generator@npm:7.27.1" @@ -268,6 +316,19 @@ __metadata: languageName: node linkType: hard +"@babel/helper-replace-supers@npm:^7.28.6": + version: 7.28.6 + resolution: "@babel/helper-replace-supers@npm:7.28.6" + dependencies: + "@babel/helper-member-expression-to-functions": "npm:^7.28.5" + "@babel/helper-optimise-call-expression": "npm:^7.27.1" + "@babel/traverse": "npm:^7.28.6" + peerDependencies: + "@babel/core": ^7.0.0 + checksum: 10/ad2724713a4d983208f509e9607e8f950855f11bd97518a700057eb8bec69d687a8f90dc2da0c3c47281d2e3b79cf1d14ecf1fe3e1ee0a8e90b61aee6759c9a7 + languageName: node + linkType: hard + "@babel/helper-skip-transparent-expression-wrappers@npm:^7.20.0, @babel/helper-skip-transparent-expression-wrappers@npm:^7.27.1": version: 7.27.1 resolution: "@babel/helper-skip-transparent-expression-wrappers@npm:7.27.1" @@ -343,6 +404,17 @@ __metadata: languageName: node linkType: hard +"@babel/parser@npm:^7.28.6, @babel/parser@npm:^7.29.0": + version: 7.29.0 + resolution: "@babel/parser@npm:7.29.0" + dependencies: + "@babel/types": "npm:^7.29.0" + bin: + parser: ./bin/babel-parser.js + checksum: 10/b1576dca41074997a33ee740d87b330ae2e647f4b7da9e8d2abd3772b18385d303b0cee962b9b88425e0f30d58358dbb8d63792c1a2d005c823d335f6a029747 + languageName: node + linkType: hard + "@babel/plugin-bugfix-firefox-class-in-computed-class-key@npm:^7.28.5": version: 7.28.5 resolution: "@babel/plugin-bugfix-firefox-class-in-computed-class-key@npm:7.28.5" @@ -767,6 +839,17 @@ __metadata: languageName: node linkType: hard +"@babel/plugin-syntax-typescript@npm:^7.28.6": + version: 7.28.6 + resolution: "@babel/plugin-syntax-typescript@npm:7.28.6" + dependencies: + "@babel/helper-plugin-utils": "npm:^7.28.6" + peerDependencies: + "@babel/core": ^7.0.0-0 + checksum: 10/5c55f9c63bd36cf3d7e8db892294c8f85000f9c1526c3a1cc310d47d1e174f5c6f6605e5cc902c4636d885faba7a9f3d5e5edc6b35e4f3b1fd4c2d58d0304fa5 + languageName: node + linkType: hard + "@babel/plugin-syntax-unicode-sets-regex@npm:^7.18.6": version: 7.18.6 resolution: "@babel/plugin-syntax-unicode-sets-regex@npm:7.18.6" @@ -779,7 +862,7 @@ __metadata: languageName: node linkType: hard -"@babel/plugin-transform-arrow-functions@npm:^7.0.0, @babel/plugin-transform-arrow-functions@npm:^7.0.0-0, @babel/plugin-transform-arrow-functions@npm:^7.24.7, @babel/plugin-transform-arrow-functions@npm:^7.27.1": +"@babel/plugin-transform-arrow-functions@npm:7.27.1, @babel/plugin-transform-arrow-functions@npm:^7.0.0, @babel/plugin-transform-arrow-functions@npm:^7.0.0-0, @babel/plugin-transform-arrow-functions@npm:^7.24.7, @babel/plugin-transform-arrow-functions@npm:^7.27.1": version: 7.27.1 resolution: "@babel/plugin-transform-arrow-functions@npm:7.27.1" dependencies: @@ -838,7 +921,7 @@ __metadata: languageName: node linkType: hard -"@babel/plugin-transform-class-properties@npm:^7.0.0-0, @babel/plugin-transform-class-properties@npm:^7.25.4, @babel/plugin-transform-class-properties@npm:^7.27.1": +"@babel/plugin-transform-class-properties@npm:7.27.1, @babel/plugin-transform-class-properties@npm:^7.0.0-0, @babel/plugin-transform-class-properties@npm:^7.25.4, @babel/plugin-transform-class-properties@npm:^7.27.1": version: 7.27.1 resolution: "@babel/plugin-transform-class-properties@npm:7.27.1" dependencies: @@ -862,7 +945,7 @@ __metadata: languageName: node linkType: hard -"@babel/plugin-transform-classes@npm:^7.0.0, @babel/plugin-transform-classes@npm:^7.0.0-0, @babel/plugin-transform-classes@npm:^7.25.4, @babel/plugin-transform-classes@npm:^7.28.4": +"@babel/plugin-transform-classes@npm:7.28.4, @babel/plugin-transform-classes@npm:^7.0.0, @babel/plugin-transform-classes@npm:^7.0.0-0, @babel/plugin-transform-classes@npm:^7.25.4, @babel/plugin-transform-classes@npm:^7.28.4": version: 7.28.4 resolution: "@babel/plugin-transform-classes@npm:7.28.4" dependencies: @@ -1136,7 +1219,7 @@ __metadata: languageName: node linkType: hard -"@babel/plugin-transform-nullish-coalescing-operator@npm:^7.0.0-0, @babel/plugin-transform-nullish-coalescing-operator@npm:^7.24.7, @babel/plugin-transform-nullish-coalescing-operator@npm:^7.27.1": +"@babel/plugin-transform-nullish-coalescing-operator@npm:7.27.1, @babel/plugin-transform-nullish-coalescing-operator@npm:^7.0.0-0, @babel/plugin-transform-nullish-coalescing-operator@npm:^7.24.7, @babel/plugin-transform-nullish-coalescing-operator@npm:^7.27.1": version: 7.27.1 resolution: "@babel/plugin-transform-nullish-coalescing-operator@npm:7.27.1" dependencies: @@ -1196,6 +1279,18 @@ __metadata: languageName: node linkType: hard +"@babel/plugin-transform-optional-chaining@npm:7.27.1": + version: 7.27.1 + resolution: "@babel/plugin-transform-optional-chaining@npm:7.27.1" + dependencies: + "@babel/helper-plugin-utils": "npm:^7.27.1" + "@babel/helper-skip-transparent-expression-wrappers": "npm:^7.27.1" + peerDependencies: + "@babel/core": ^7.0.0-0 + checksum: 10/34b0f96400c259a2722740d17a001fe45f78d8ff052c40e29db2e79173be72c1cfe8d9681067e3f5da3989e4a557402df5c982c024c18257587a41e022f95640 + languageName: node + linkType: hard + "@babel/plugin-transform-optional-chaining@npm:^7.0.0-0, @babel/plugin-transform-optional-chaining@npm:^7.24.8, @babel/plugin-transform-optional-chaining@npm:^7.27.1, @babel/plugin-transform-optional-chaining@npm:^7.28.5": version: 7.28.5 resolution: "@babel/plugin-transform-optional-chaining@npm:7.28.5" @@ -1376,7 +1471,7 @@ __metadata: languageName: node linkType: hard -"@babel/plugin-transform-shorthand-properties@npm:^7.0.0, @babel/plugin-transform-shorthand-properties@npm:^7.0.0-0, @babel/plugin-transform-shorthand-properties@npm:^7.24.7, @babel/plugin-transform-shorthand-properties@npm:^7.27.1": +"@babel/plugin-transform-shorthand-properties@npm:7.27.1, @babel/plugin-transform-shorthand-properties@npm:^7.0.0, @babel/plugin-transform-shorthand-properties@npm:^7.0.0-0, @babel/plugin-transform-shorthand-properties@npm:^7.24.7, @babel/plugin-transform-shorthand-properties@npm:^7.27.1": version: 7.27.1 resolution: "@babel/plugin-transform-shorthand-properties@npm:7.27.1" dependencies: @@ -1421,7 +1516,7 @@ __metadata: languageName: node linkType: hard -"@babel/plugin-transform-template-literals@npm:^7.0.0-0, @babel/plugin-transform-template-literals@npm:^7.27.1": +"@babel/plugin-transform-template-literals@npm:7.27.1, @babel/plugin-transform-template-literals@npm:^7.0.0-0, @babel/plugin-transform-template-literals@npm:^7.27.1": version: 7.27.1 resolution: "@babel/plugin-transform-template-literals@npm:7.27.1" dependencies: @@ -1458,6 +1553,21 @@ __metadata: languageName: node linkType: hard +"@babel/plugin-transform-typescript@npm:^7.27.1": + version: 7.28.6 + resolution: "@babel/plugin-transform-typescript@npm:7.28.6" + dependencies: + "@babel/helper-annotate-as-pure": "npm:^7.27.3" + "@babel/helper-create-class-features-plugin": "npm:^7.28.6" + "@babel/helper-plugin-utils": "npm:^7.28.6" + "@babel/helper-skip-transparent-expression-wrappers": "npm:^7.27.1" + "@babel/plugin-syntax-typescript": "npm:^7.28.6" + peerDependencies: + "@babel/core": ^7.0.0-0 + checksum: 10/a0bccc531fa8710a45b0b593140273741e0e4a0721b1ef6ef9dfefae0bbe61528440d65aab7936929551fd76793272257d74f60cf66891352f793294930a4b67 + languageName: node + linkType: hard + "@babel/plugin-transform-unicode-escapes@npm:^7.27.1": version: 7.27.1 resolution: "@babel/plugin-transform-unicode-escapes@npm:7.27.1" @@ -1481,7 +1591,7 @@ __metadata: languageName: node linkType: hard -"@babel/plugin-transform-unicode-regex@npm:^7.0.0, @babel/plugin-transform-unicode-regex@npm:^7.0.0-0, @babel/plugin-transform-unicode-regex@npm:^7.24.7, @babel/plugin-transform-unicode-regex@npm:^7.27.1": +"@babel/plugin-transform-unicode-regex@npm:7.27.1, @babel/plugin-transform-unicode-regex@npm:^7.0.0, @babel/plugin-transform-unicode-regex@npm:^7.0.0-0, @babel/plugin-transform-unicode-regex@npm:^7.24.7, @babel/plugin-transform-unicode-regex@npm:^7.27.1": version: 7.27.1 resolution: "@babel/plugin-transform-unicode-regex@npm:7.27.1" dependencies: @@ -1614,6 +1724,21 @@ __metadata: languageName: node linkType: hard +"@babel/preset-typescript@npm:7.27.1": + version: 7.27.1 + resolution: "@babel/preset-typescript@npm:7.27.1" + dependencies: + "@babel/helper-plugin-utils": "npm:^7.27.1" + "@babel/helper-validator-option": "npm:^7.27.1" + "@babel/plugin-syntax-jsx": "npm:^7.27.1" + "@babel/plugin-transform-modules-commonjs": "npm:^7.27.1" + "@babel/plugin-transform-typescript": "npm:^7.27.1" + peerDependencies: + "@babel/core": ^7.0.0-0 + checksum: 10/9d8e75326b3c93fa016ba7aada652800fc77bc05fcc181888700a049935e8cf1284b549de18a5d62ef3591d02f097ea6de1111f7d71a991aaf36ba74657bd145 + languageName: node + linkType: hard + "@babel/preset-typescript@npm:^7.16.7, @babel/preset-typescript@npm:^7.23.0, @babel/preset-typescript@npm:^7.24.7": version: 7.28.5 resolution: "@babel/preset-typescript@npm:7.28.5" @@ -1647,6 +1772,17 @@ __metadata: languageName: node linkType: hard +"@babel/template@npm:^7.28.6": + version: 7.28.6 + resolution: "@babel/template@npm:7.28.6" + dependencies: + "@babel/code-frame": "npm:^7.28.6" + "@babel/parser": "npm:^7.28.6" + "@babel/types": "npm:^7.28.6" + checksum: 10/0ad6e32bf1e7e31bf6b52c20d15391f541ddd645cbd488a77fe537a15b280ee91acd3a777062c52e03eedbc2e1f41548791f6a3697c02476ec5daf49faa38533 + languageName: node + linkType: hard + "@babel/traverse--for-generate-function-map@npm:@babel/traverse@^7.25.3, @babel/traverse@npm:^7.25.3, @babel/traverse@npm:^7.27.1, @babel/traverse@npm:^7.28.0, @babel/traverse@npm:^7.28.3, @babel/traverse@npm:^7.28.4, @babel/traverse@npm:^7.28.5": version: 7.28.5 resolution: "@babel/traverse@npm:7.28.5" @@ -1662,6 +1798,21 @@ __metadata: languageName: node linkType: hard +"@babel/traverse@npm:^7.28.6": + version: 7.29.0 + resolution: "@babel/traverse@npm:7.29.0" + dependencies: + "@babel/code-frame": "npm:^7.29.0" + "@babel/generator": "npm:^7.29.0" + "@babel/helper-globals": "npm:^7.28.0" + "@babel/parser": "npm:^7.29.0" + "@babel/template": "npm:^7.28.6" + "@babel/types": "npm:^7.29.0" + debug: "npm:^4.3.1" + checksum: 10/3a0d0438f1ba9fed4fbe1706ea598a865f9af655a16ca9517ab57bda526e224569ca1b980b473fb68feea5e08deafbbf2cf9febb941f92f2d2533310c3fc4abc + languageName: node + linkType: hard + "@babel/types@npm:^7.0.0, @babel/types@npm:^7.20.7, @babel/types@npm:^7.21.3, @babel/types@npm:^7.24.7, @babel/types@npm:^7.25.2, @babel/types@npm:^7.26.0, @babel/types@npm:^7.27.1, @babel/types@npm:^7.27.3, @babel/types@npm:^7.28.2, @babel/types@npm:^7.28.4, @babel/types@npm:^7.28.5, @babel/types@npm:^7.3.3, @babel/types@npm:^7.4.4": version: 7.28.5 resolution: "@babel/types@npm:7.28.5" @@ -1672,6 +1823,16 @@ __metadata: languageName: node linkType: hard +"@babel/types@npm:^7.28.6, @babel/types@npm:^7.29.0": + version: 7.29.0 + resolution: "@babel/types@npm:7.29.0" + dependencies: + "@babel/helper-string-parser": "npm:^7.27.1" + "@babel/helper-validator-identifier": "npm:^7.28.5" + checksum: 10/bfc2b211210f3894dcd7e6a33b2d1c32c93495dc1e36b547376aa33441abe551ab4bc1640d4154ee2acd8e46d3bbc925c7224caae02fcaf0e6a771e97fccc661 + languageName: node + linkType: hard + "@bcoe/v8-coverage@npm:^0.2.3": version: 0.2.3 resolution: "@bcoe/v8-coverage@npm:0.2.3" @@ -4799,6 +4960,15 @@ __metadata: languageName: node linkType: hard +"@types/react@npm:~19.2.0": + version: 19.2.14 + resolution: "@types/react@npm:19.2.14" + dependencies: + csstype: "npm:^3.2.2" + checksum: 10/fbff239089ee64b6bd9b00543594db498278b06de527ef1b0f71bb0eb09cc4445a71b5dd3c0d3d0257255c4eed94406be40a74ad4a987ade8a8d5dd65c82bc5f + languageName: node + linkType: hard + "@types/semver@npm:^7.3.12": version: 7.7.1 resolution: "@types/semver@npm:7.7.1" @@ -5147,6 +5317,18 @@ __metadata: languageName: node linkType: hard +"ajv@npm:^8.11.0": + version: 8.18.0 + resolution: "ajv@npm:8.18.0" + dependencies: + fast-deep-equal: "npm:^3.1.3" + fast-uri: "npm:^3.0.1" + json-schema-traverse: "npm:^1.0.0" + require-from-string: "npm:^2.0.2" + checksum: 10/bfed9de827a2b27c6d4084324eda76a4e32bdde27410b3e9b81d06e6f8f5c78370fc6b93fe1d869f1939ff1d7c4ae8896960995acb8425e3e9288c8884247c48 + languageName: node + linkType: hard + "anser@npm:^1.4.9": version: 1.4.10 resolution: "anser@npm:1.4.10" @@ -6387,8 +6569,9 @@ __metadata: "@react-navigation/native": "npm:^7.1.6" "@shopify/react-native-skia": "npm:2.2.12" "@types/pngjs": "npm:^6.0.5" - "@types/react": "npm:~19.1.10" + "@types/react": "npm:~19.2.0" expo: "npm:^54.0.27" + expo-build-properties: "npm:~1.0.10" expo-constants: "npm:~18.0.11" expo-font: "npm:~14.0.10" expo-linking: "npm:~8.0.10" @@ -6402,12 +6585,14 @@ __metadata: react-native-gesture-handler: "npm:~2.28.0" react-native-image-picker: "npm:^7.2.2" react-native-loading-spinner-overlay: "npm:^3.0.1" - react-native-reanimated: "npm:~4.1.1" + react-native-nitro-image: "npm:0.10.2" + react-native-nitro-modules: "npm:0.33.4" + react-native-reanimated: "npm:~4.2.1" react-native-safe-area-context: "npm:~5.6.0" react-native-screens: "npm:~4.16.0" react-native-svg: "npm:15.12.1" react-native-svg-transformer: "npm:^1.5.0" - react-native-worklets: "npm:0.5.1" + react-native-worklets: "npm:^0.7.2" languageName: unknown linkType: soft @@ -6437,7 +6622,7 @@ __metadata: languageName: node linkType: hard -"convert-source-map@npm:^2.0.0": +"convert-source-map@npm:2.0.0, convert-source-map@npm:^2.0.0": version: 2.0.0 resolution: "convert-source-map@npm:2.0.0" checksum: 10/c987be3ec061348cdb3c2bfb924bec86dea1eacad10550a85ca23edb0fe3556c3a61c7399114f3331ccb3499d7fd0285ab24566e5745929412983494c3926e15 @@ -6757,7 +6942,7 @@ __metadata: languageName: node linkType: hard -"csstype@npm:^3.0.2": +"csstype@npm:^3.0.2, csstype@npm:^3.2.2": version: 3.2.3 resolution: "csstype@npm:3.2.3" checksum: 10/ad41baf7e2ffac65ab544d79107bf7cd1a4bb9bab9ac3302f59ab4ba655d5e30942a8ae46e10ba160c6f4ecea464cc95b975ca2fefbdeeacd6ac63f12f99fe1f @@ -7849,6 +8034,18 @@ __metadata: languageName: node linkType: hard +"expo-build-properties@npm:~1.0.10": + version: 1.0.10 + resolution: "expo-build-properties@npm:1.0.10" + dependencies: + ajv: "npm:^8.11.0" + semver: "npm:^7.6.0" + peerDependencies: + expo: "*" + checksum: 10/0dde41d659d243268ceae49bba3e4c07b72c245df8124f86fb720bc0556a2c4d03dd75e59e068a07438ef5ba3188b67a7a6516d2a37d3d91429070745b2506a2 + languageName: node + linkType: hard + "expo-calendar@npm:~15.0.8": version: 15.0.8 resolution: "expo-calendar@npm:15.0.8" @@ -8236,6 +8433,13 @@ __metadata: languageName: node linkType: hard +"fast-uri@npm:^3.0.1": + version: 3.1.0 + resolution: "fast-uri@npm:3.1.0" + checksum: 10/818b2c96dc913bcf8511d844c3d2420e2c70b325c0653633f51821e4e29013c2015387944435cd0ef5322c36c9beecc31e44f71b257aeb8e0b333c1d62bb17c2 + languageName: node + linkType: hard + "fast-xml-parser@npm:^4.4.1": version: 4.5.3 resolution: "fast-xml-parser@npm:4.5.3" @@ -10192,6 +10396,13 @@ __metadata: languageName: node linkType: hard +"json-schema-traverse@npm:^1.0.0": + version: 1.0.0 + resolution: "json-schema-traverse@npm:1.0.0" + checksum: 10/02f2f466cdb0362558b2f1fd5e15cce82ef55d60cd7f8fa828cf35ba74330f8d767fcae5c5c2adb7851fa811766c694b9405810879bc4e1ddd78a7c0e03658ad + languageName: node + linkType: hard + "json-stable-stringify-without-jsonify@npm:^1.0.1": version: 1.0.1 resolution: "json-stable-stringify-without-jsonify@npm:1.0.1" @@ -13138,7 +13349,7 @@ __metadata: languageName: node linkType: hard -"react-native-is-edge-to-edge@npm:^1.1.6, react-native-is-edge-to-edge@npm:^1.2.1": +"react-native-is-edge-to-edge@npm:1.2.1, react-native-is-edge-to-edge@npm:^1.1.6, react-native-is-edge-to-edge@npm:^1.2.1": version: 1.2.1 resolution: "react-native-is-edge-to-edge@npm:1.2.1" peerDependencies: @@ -13183,6 +13394,27 @@ __metadata: languageName: node linkType: hard +"react-native-nitro-image@npm:0.10.2": + version: 0.10.2 + resolution: "react-native-nitro-image@npm:0.10.2" + peerDependencies: + react: "*" + react-native: "*" + react-native-nitro-modules: "*" + checksum: 10/3be75e93da369adfe00441dae78171572dec38d3d7e75e5d4cb302b81479be9686c8d8dc0ea4b331514b8725099bf3eb069ab9933f7029627d12a72d71766cb4 + languageName: node + linkType: hard + +"react-native-nitro-modules@npm:0.33.4": + version: 0.33.4 + resolution: "react-native-nitro-modules@npm:0.33.4" + peerDependencies: + react: "*" + react-native: "*" + checksum: 10/a737ff6b142c55821688612305245fd10a7cff36f0ee66cad0956c6815a60cdd4ba64cdfba6137a6dbfe815645763ce5d406cf488876edd47dab7f8d0031e01a + languageName: node + linkType: hard + "react-native-reanimated@npm:~4.1.1": version: 4.1.6 resolution: "react-native-reanimated@npm:4.1.6" @@ -13198,6 +13430,20 @@ __metadata: languageName: node linkType: hard +"react-native-reanimated@npm:~4.2.1": + version: 4.2.2 + resolution: "react-native-reanimated@npm:4.2.2" + dependencies: + react-native-is-edge-to-edge: "npm:1.2.1" + semver: "npm:7.7.3" + peerDependencies: + react: "*" + react-native: "*" + react-native-worklets: ">=0.7.0" + checksum: 10/2ad24cc827aaabb54c18d75a4ab98b92a25dd57c05bfabb886341c0e62d8efc5d5973f415cb1da2ecab9ebe077bec1179b91c681de90e124dbf1160a418ee29d + languageName: node + linkType: hard + "react-native-safe-area-context@npm:~5.6.0": version: 5.6.2 resolution: "react-native-safe-area-context@npm:5.6.2" @@ -13274,6 +13520,29 @@ __metadata: languageName: node linkType: hard +"react-native-worklets@npm:^0.7.2": + version: 0.7.4 + resolution: "react-native-worklets@npm:0.7.4" + dependencies: + "@babel/plugin-transform-arrow-functions": "npm:7.27.1" + "@babel/plugin-transform-class-properties": "npm:7.27.1" + "@babel/plugin-transform-classes": "npm:7.28.4" + "@babel/plugin-transform-nullish-coalescing-operator": "npm:7.27.1" + "@babel/plugin-transform-optional-chaining": "npm:7.27.1" + "@babel/plugin-transform-shorthand-properties": "npm:7.27.1" + "@babel/plugin-transform-template-literals": "npm:7.27.1" + "@babel/plugin-transform-unicode-regex": "npm:7.27.1" + "@babel/preset-typescript": "npm:7.27.1" + convert-source-map: "npm:2.0.0" + semver: "npm:7.7.3" + peerDependencies: + "@babel/core": "*" + react: "*" + react-native: "*" + checksum: 10/922b209940e298d21313d22f8a6eb87ad603442850c7ff8bc9cfef694cb211d7ec9903e24ee20b6bcf6164f8e7c165b65307dcca3d67465fdffda1c45fe05d1d + languageName: node + linkType: hard + "react-native@npm:0.81.5": version: 0.81.5 resolution: "react-native@npm:0.81.5" @@ -13790,21 +14059,21 @@ __metadata: languageName: node linkType: hard -"semver@npm:^6.3.0, semver@npm:^6.3.1": - version: 6.3.1 - resolution: "semver@npm:6.3.1" +"semver@npm:7.7.3, semver@npm:^7.1.3, semver@npm:^7.3.5, semver@npm:^7.3.7, semver@npm:^7.5.2, semver@npm:^7.5.3, semver@npm:^7.5.4, semver@npm:^7.6.0, semver@npm:^7.7.1": + version: 7.7.3 + resolution: "semver@npm:7.7.3" bin: semver: bin/semver.js - checksum: 10/1ef3a85bd02a760c6ef76a45b8c1ce18226de40831e02a00bad78485390b98b6ccaa31046245fc63bba4a47a6a592b6c7eedc65cc47126e60489f9cc1ce3ed7e + checksum: 10/8dbc3168e057a38fc322af909c7f5617483c50caddba135439ff09a754b20bdd6482a5123ff543dad4affa488ecf46ec5fb56d61312ad20bb140199b88dfaea9 languageName: node linkType: hard -"semver@npm:^7.1.3, semver@npm:^7.3.5, semver@npm:^7.3.7, semver@npm:^7.5.2, semver@npm:^7.5.3, semver@npm:^7.5.4, semver@npm:^7.6.0, semver@npm:^7.7.1": - version: 7.7.3 - resolution: "semver@npm:7.7.3" +"semver@npm:^6.3.0, semver@npm:^6.3.1": + version: 6.3.1 + resolution: "semver@npm:6.3.1" bin: semver: bin/semver.js - checksum: 10/8dbc3168e057a38fc322af909c7f5617483c50caddba135439ff09a754b20bdd6482a5123ff543dad4affa488ecf46ec5fb56d61312ad20bb140199b88dfaea9 + checksum: 10/1ef3a85bd02a760c6ef76a45b8c1ce18226de40831e02a00bad78485390b98b6ccaa31046245fc63bba4a47a6a592b6c7eedc65cc47126e60489f9cc1ce3ed7e languageName: node linkType: hard From 62df7ceb464e9bcb50dfcea537f967a3d102aabf Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Fri, 20 Feb 2026 09:27:37 +0100 Subject: [PATCH 12/37] fix: remove redundant preprocessing step --- .../common/rnexecutorch/models/VisionModel.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp index 8155b8819..a81518921 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp @@ -11,12 +11,8 @@ using namespace facebook; cv::Mat VisionModel::extractFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData) const { - // Extract frame using FrameProcessor utility auto frameObj = frameData.asObject(runtime); - cv::Mat frame = utils::FrameProcessor::extractFrame(runtime, frameObj); - - // Apply model-specific preprocessing - return preprocessFrame(frame); + return ::rnexecutorch::utils::FrameProcessor::extractFrame(runtime, frameObj); } cv::Mat VisionModel::extractFromPixels(const JSTensorViewIn &tensorView) const { From 962f1c385cbcfb56497b14a5b6cfc10c93b561a8 Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Mon, 23 Feb 2026 11:46:18 +0100 Subject: [PATCH 13/37] refactor: changes suggested in review --- .cspell-wordlist.txt | 4 +- .../rnexecutorch/RnExecutorchInstaller.h | 3 - .../rnexecutorch/models/VisionModel.cpp | 18 +-- .../common/rnexecutorch/models/VisionModel.h | 24 +--- .../object_detection/ObjectDetection.cpp | 41 +------ .../models/object_detection/ObjectDetection.h | 1 - .../rnexecutorch/utils/FrameExtractor.cpp | 50 ++++---- .../rnexecutorch/utils/FrameExtractor.h | 61 +++------- .../rnexecutorch/utils/FrameProcessor.cpp | 102 ++--------------- .../rnexecutorch/utils/FrameProcessor.h | 108 +++--------------- .../src/hooks/useModule.ts | 1 - .../modules/computer_vision/VisionModule.ts | 27 +++-- 12 files changed, 91 insertions(+), 349 deletions(-) diff --git a/.cspell-wordlist.txt b/.cspell-wordlist.txt index a2e8ecbab..cb92b44bd 100644 --- a/.cspell-wordlist.txt +++ b/.cspell-wordlist.txt @@ -116,4 +116,6 @@ antonov rfdetr basemodule IMAGENET -worklet \ No newline at end of file +worklet +worklets +BGRA \ No newline at end of file diff --git a/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h b/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h index 80b7d18b3..54e8c1cbb 100644 --- a/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h +++ b/packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h @@ -54,9 +54,6 @@ class RnExecutorchInstaller { meta::createConstructorArgsWithCallInvoker( args, runtime, jsCallInvoker); - // This unpacks the tuple and calls the constructor directly inside - // make_shared. It avoids creating a temporary object, so no - // move/copy is required. auto modelImplementationPtr = std::apply( [](auto &&...unpackedArgs) { return std::make_shared( diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp index a81518921..b88310e12 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp @@ -4,19 +4,17 @@ #include #include -namespace rnexecutorch { -namespace models { +namespace rnexecutorch::models { using namespace facebook; cv::Mat VisionModel::extractFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData) const { auto frameObj = frameData.asObject(runtime); - return ::rnexecutorch::utils::FrameProcessor::extractFrame(runtime, frameObj); + return ::rnexecutorch::utils::extractFrame(runtime, frameObj); } cv::Mat VisionModel::extractFromPixels(const JSTensorViewIn &tensorView) const { - // Validate dimensions: sizes must be [height, width, channels] if (tensorView.sizes.size() != 3) { char errorMessage[100]; std::snprintf(errorMessage, sizeof(errorMessage), @@ -27,11 +25,10 @@ cv::Mat VisionModel::extractFromPixels(const JSTensorViewIn &tensorView) const { errorMessage); } - int height = tensorView.sizes[0]; - int width = tensorView.sizes[1]; - int channels = tensorView.sizes[2]; + int32_t height = tensorView.sizes[0]; + int32_t width = tensorView.sizes[1]; + int32_t channels = tensorView.sizes[2]; - // Pixel data must be RGB (3 channels) and BYTE type if (channels != 3) { char errorMessage[100]; std::snprintf(errorMessage, sizeof(errorMessage), @@ -47,13 +44,10 @@ cv::Mat VisionModel::extractFromPixels(const JSTensorViewIn &tensorView) const { "Invalid pixel data: scalarType must be BYTE (Uint8Array)"); } - // Create cv::Mat directly from dataPtr (zero-copy view) - // Data is valid for the duration of this synchronous call uint8_t *dataPtr = static_cast(tensorView.dataPtr); cv::Mat image(height, width, CV_8UC3, dataPtr); return image; } -} // namespace models -} // namespace rnexecutorch +} // namespace rnexecutorch::models diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h index c362d745f..82d544db3 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h @@ -18,17 +18,13 @@ namespace models { * process camera frames in real-time (e.g., at 30fps). * * Thread Safety: - * - All inference operations are protected by a mutex - * - generateFromFrame() uses try_lock() to skip frames when the model is busy - * - This prevents blocking the camera thread and maintains smooth frame rates + * - All inference operations are protected by a mutex via scoped_lock * * Usage: * Subclasses should: * 1. Inherit from VisionModel instead of BaseModel * 2. Implement preprocessFrame() with model-specific preprocessing - * 3. Use inference_mutex_ when calling forward() in custom generate methods - * 4. Use lock_guard for blocking operations (JS API) - * 5. Use try_lock() for non-blocking operations (camera API) + * 3. Delegate to runInference() which handles locking internally * * Example: * @code @@ -36,18 +32,9 @@ namespace models { * public: * std::unordered_map * generateFromFrame(jsi::Runtime& runtime, const jsi::Value& frameValue) { - * // try_lock is handled automatically * auto frameObject = frameValue.asObject(runtime); - * cv::Mat frame = FrameExtractor::extractFrame(runtime, frameObject); - * - * // Lock before inference - * if (!inference_mutex_.try_lock()) { - * return {}; // Skip frame if busy - * } - * std::lock_guard lock(inference_mutex_, std::adopt_lock); - * - * auto preprocessed = preprocessFrame(frame); - * // ... run inference + * cv::Mat frame = utils::extractFrame(runtime, frameObject); + * return runInference(frame); * } * }; * @endcode @@ -64,9 +51,6 @@ class VisionModel : public BaseModel { std::shared_ptr callInvoker) : BaseModel(modelSource, callInvoker) {} - /** - * @brief Virtual destructor for proper cleanup in derived classes - */ virtual ~VisionModel() = default; protected: diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp index f17a4f074..1ae2460c3 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp @@ -33,26 +33,19 @@ ObjectDetection::ObjectDetection( } cv::Mat ObjectDetection::preprocessFrame(const cv::Mat &frame) const { - // Get target size from model input shape const std::vector tensorDims = getAllInputShapes()[0]; cv::Size tensorSize = cv::Size(tensorDims[tensorDims.size() - 1], tensorDims[tensorDims.size() - 2]); cv::Mat rgb; - // Convert RGBA/BGRA to RGB if needed (for VisionCamera frames) if (frame.channels() == 4) { -// Platform-specific color conversion: -// iOS uses BGRA format, Android uses RGBA format #ifdef __APPLE__ - // iOS: BGRA β†’ RGB cv::cvtColor(frame, rgb, cv::COLOR_BGRA2RGB); #else - // Android: RGBA β†’ RGB cv::cvtColor(frame, rgb, cv::COLOR_RGBA2RGB); #endif } else if (frame.channels() == 3) { - // Already RGB rgb = frame; } else { char errorMessage[100]; @@ -113,15 +106,11 @@ ObjectDetection::postprocess(const std::vector &tensors, std::vector ObjectDetection::runInference(cv::Mat image, double detectionThreshold) { - std::lock_guard lock(inference_mutex_); + std::scoped_lock lock(inference_mutex_); - // Store original size for postprocessing cv::Size originalSize = image.size(); - - // Preprocess the image using model-specific preprocessing cv::Mat preprocessed = preprocessFrame(image); - // Create tensor and run inference const std::vector tensorDims = getAllInputShapes()[0]; auto inputTensor = image_processing::getTensorFromMatrix(tensorDims, preprocessed); @@ -139,14 +128,11 @@ ObjectDetection::runInference(cv::Mat image, double detectionThreshold) { std::vector ObjectDetection::generateFromString(std::string imageSource, double detectionThreshold) { - // Read image using OpenCV (BGR format) - cv::Mat image = image_processing::readImage(imageSource); + cv::Mat imageBGR = image_processing::readImage(imageSource); - // Convert BGR to RGB (OpenCV imread returns BGR) cv::Mat imageRGB; - cv::cvtColor(image, imageRGB, cv::COLOR_BGR2RGB); + cv::cvtColor(imageBGR, imageRGB, cv::COLOR_BGR2RGB); - // Use the internal helper - it handles locking, preprocessing, and inference return runInference(imageRGB, detectionThreshold); } @@ -154,22 +140,9 @@ std::vector ObjectDetection::generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData, double detectionThreshold) { - // Try-lock: skip frame if model is busy (non-blocking for camera) - if (!inference_mutex_.try_lock()) { - return {}; // Return empty vector, don't block camera thread - } + auto frameObj = frameData.asObject(runtime); + cv::Mat frame = rnexecutorch::utils::extractFrame(runtime, frameObj); - // Extract frame (under lock to ensure thread safety) - cv::Mat frame; - { - std::lock_guard lock(inference_mutex_, std::adopt_lock); - auto frameObj = frameData.asObject(runtime); - frame = - rnexecutorch::utils::FrameProcessor::extractFrame(runtime, frameObj); - } - // Lock is automatically released here when going out of scope - - // Use the internal helper - it handles locking, preprocessing, and inference return runInference(frame, detectionThreshold); } @@ -177,14 +150,10 @@ std::vector ObjectDetection::generateFromPixels(jsi::Runtime &runtime, const jsi::Value &pixelData, double detectionThreshold) { - // Convert JSI value to JSTensorViewIn auto tensorView = jsi_conversion::getValue(pixelData, runtime); - - // Extract raw pixel data to cv::Mat cv::Mat image = extractFromPixels(tensorView); - // Use the internal helper - it handles locking, preprocessing, and inference return runInference(image, detectionThreshold); } } // namespace rnexecutorch::models::object_detection \ No newline at end of file diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h index fc554003b..bf231ff0b 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h @@ -30,7 +30,6 @@ class ObjectDetection : public VisionModel { double detectionThreshold); protected: - // Internal helper for shared preprocessing and inference logic std::vector runInference(cv::Mat image, double detectionThreshold); cv::Mat preprocessFrame(const cv::Mat &frame) const override; diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp index 900eae297..3b31bc10f 100644 --- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp @@ -7,52 +7,35 @@ #import #endif -#ifdef __ANDROID__ -#if __ANDROID_API__ >= 26 +#if defined(__ANDROID__) && __ANDROID_API__ >= 26 #include #endif -#endif -namespace rnexecutorch { -namespace utils { +namespace rnexecutorch::utils { -cv::Mat FrameExtractor::extractFromNativeBuffer(uint64_t bufferPtr) { -#ifdef __APPLE__ - return extractFromCVPixelBuffer(reinterpret_cast(bufferPtr)); -#elif defined(__ANDROID__) - return extractFromAHardwareBuffer(reinterpret_cast(bufferPtr)); -#else - throw RnExecutorchError(RnExecutorchErrorCode::NotSupported, - "NativeBuffer not supported on this platform"); -#endif -} +namespace { #ifdef __APPLE__ -cv::Mat FrameExtractor::extractFromCVPixelBuffer(void *pixelBuffer) { +cv::Mat extractFromCVPixelBuffer(void *pixelBuffer) { CVPixelBufferRef buffer = static_cast(pixelBuffer); - // Get buffer properties size_t width = CVPixelBufferGetWidth(buffer); size_t height = CVPixelBufferGetHeight(buffer); size_t bytesPerRow = CVPixelBufferGetBytesPerRow(buffer); OSType pixelFormat = CVPixelBufferGetPixelFormatType(buffer); - // Lock the buffer (Vision Camera should have already locked it, but ensure) CVPixelBufferLockBaseAddress(buffer, kCVPixelBufferLock_ReadOnly); void *baseAddress = CVPixelBufferGetBaseAddress(buffer); cv::Mat mat; if (pixelFormat == kCVPixelFormatType_32BGRA) { - // BGRA format (most common on iOS when using pixelFormat: 'rgb') mat = cv::Mat(static_cast(height), static_cast(width), CV_8UC4, baseAddress, bytesPerRow); } else if (pixelFormat == kCVPixelFormatType_32RGBA) { - // RGBA format mat = cv::Mat(static_cast(height), static_cast(width), CV_8UC4, baseAddress, bytesPerRow); } else if (pixelFormat == kCVPixelFormatType_24RGB) { - // RGB format mat = cv::Mat(static_cast(height), static_cast(width), CV_8UC3, baseAddress, bytesPerRow); } else { @@ -72,15 +55,13 @@ cv::Mat FrameExtractor::extractFromCVPixelBuffer(void *pixelBuffer) { #endif #ifdef __ANDROID__ -cv::Mat FrameExtractor::extractFromAHardwareBuffer(void *hardwareBuffer) { +cv::Mat extractFromAHardwareBuffer(void *hardwareBuffer) { #if __ANDROID_API__ >= 26 AHardwareBuffer *buffer = static_cast(hardwareBuffer); - // Get buffer description AHardwareBuffer_Desc desc; AHardwareBuffer_describe(buffer, &desc); - // Lock the buffer for CPU read access void *data = nullptr; int lockResult = AHardwareBuffer_lock( buffer, AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN, -1, nullptr, &data); @@ -93,13 +74,10 @@ cv::Mat FrameExtractor::extractFromAHardwareBuffer(void *hardwareBuffer) { cv::Mat mat; if (desc.format == AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM) { - // RGBA format (expected when using pixelFormat: 'rgb' on Android) mat = cv::Mat(desc.height, desc.width, CV_8UC4, data, desc.stride * 4); } else if (desc.format == AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM) { - // RGBX format (treated as RGBA) mat = cv::Mat(desc.height, desc.width, CV_8UC4, data, desc.stride * 4); } else if (desc.format == AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM) { - // RGB format (less common) mat = cv::Mat(desc.height, desc.width, CV_8UC3, data, desc.stride * 3); } else { AHardwareBuffer_unlock(buffer, nullptr); @@ -118,7 +96,19 @@ cv::Mat FrameExtractor::extractFromAHardwareBuffer(void *hardwareBuffer) { "AHardwareBuffer requires Android API 26+"); #endif // __ANDROID_API__ >= 26 } -#endif // __ANDROID__ +#endif + +} // namespace + +cv::Mat extractFromNativeBuffer(uint64_t bufferPtr) { +#ifdef __APPLE__ + return extractFromCVPixelBuffer(reinterpret_cast(bufferPtr)); +#elif defined(__ANDROID__) + return extractFromAHardwareBuffer(reinterpret_cast(bufferPtr)); +#else + throw RnExecutorchError(RnExecutorchErrorCode::NotSupported, + "NativeBuffer not supported on this platform"); +#endif +} -} // namespace utils -} // namespace rnexecutorch +} // namespace rnexecutorch::utils diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.h b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.h index a90e6ad23..f5d7c2094 100644 --- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.h +++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.h @@ -3,58 +3,23 @@ #include #include -namespace rnexecutorch { -namespace utils { +namespace rnexecutorch::utils { /** - * @brief Utility class for extracting cv::Mat from native platform buffers + * @brief Extract cv::Mat from a native platform buffer pointer (zero-copy) * - * Provides zero-copy extraction of frames from: + * Dispatches to the platform-specific implementation: * - iOS: CVPixelBufferRef * - Android: AHardwareBuffer + * + * @param bufferPtr Platform-specific buffer pointer (uint64_t) + * @return cv::Mat wrapping the buffer data (zero-copy) + * + * @throws RnExecutorchError if the platform is unsupported or extraction fails + * + * @note The returned cv::Mat does not own the data. + * Caller must ensure the buffer remains valid during use. */ -class FrameExtractor { -public: - /** - * @brief Extract cv::Mat from a native buffer pointer - * - * @param bufferPtr Platform-specific buffer pointer (uint64_t) - * - iOS: CVPixelBufferRef - * - Android: AHardwareBuffer* - * @return cv::Mat wrapping the buffer data (zero-copy) - * - * @note The returned cv::Mat does not own the data. - * The caller must ensure the buffer remains valid. - * @note The buffer must be locked before calling and unlocked after use. - */ - static cv::Mat extractFromNativeBuffer(uint64_t bufferPtr); - -#ifdef __APPLE__ - /** - * @brief Extract cv::Mat from CVPixelBuffer (iOS) - * - * @param pixelBuffer CVPixelBufferRef pointer - * @return cv::Mat wrapping the pixel buffer data - * - * @note Assumes buffer is already locked by Vision Camera - * @note Supports kCVPixelFormatType_32BGRA and kCVPixelFormatType_24RGB - */ - static cv::Mat extractFromCVPixelBuffer(void *pixelBuffer); -#endif - -#ifdef __ANDROID__ - /** - * @brief Extract cv::Mat from AHardwareBuffer (Android) - * - * @param hardwareBuffer AHardwareBuffer* pointer - * @return cv::Mat wrapping the hardware buffer data - * - * @note Assumes buffer is already locked by Vision Camera - * @note Supports AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM and R8G8B8_UNORM - */ - static cv::Mat extractFromAHardwareBuffer(void *hardwareBuffer); -#endif -}; +cv::Mat extractFromNativeBuffer(uint64_t bufferPtr); -} // namespace utils -} // namespace rnexecutorch +} // namespace rnexecutorch::utils diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp index 5e593dfd0..30238ad5c 100644 --- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp @@ -2,105 +2,27 @@ #include "FrameExtractor.h" #include #include -#include -namespace rnexecutorch { -namespace utils { +namespace rnexecutorch::utils { -cv::Mat FrameProcessor::extractFrame(jsi::Runtime &runtime, - const jsi::Object &frameData) { - // Try zero-copy path first (nativeBuffer) - // Native buffer contains dimensions, so we don't need width/height properties - if (hasNativeBuffer(runtime, frameData)) { - try { - return extractFromNativeBuffer(runtime, frameData); - } catch (const std::exception &e) { - // Fallback to ArrayBuffer on failure - } - } - - // Fallback to ArrayBuffer path (with copy) - // Get frame dimensions for ArrayBuffer path - if (frameData.hasProperty(runtime, "data")) { - int width = - static_cast(frameData.getProperty(runtime, "width").asNumber()); - int height = - static_cast(frameData.getProperty(runtime, "height").asNumber()); - return extractFromArrayBuffer(runtime, frameData, width, height); - } +namespace { - // No valid frame data source - throw RnExecutorchError( - RnExecutorchErrorCode::InvalidUserInput, - "FrameProcessor: No valid frame data (neither nativeBuffer nor data " - "property found)"); +bool hasNativeBuffer(jsi::Runtime &runtime, const jsi::Object &frameData) { + return frameData.hasProperty(runtime, "nativeBuffer"); } -cv::Size FrameProcessor::getFrameSize(jsi::Runtime &runtime, - const jsi::Object &frameData) { - if (!frameData.hasProperty(runtime, "width") || - !frameData.hasProperty(runtime, "height")) { - throw RnExecutorchError( - RnExecutorchErrorCode::InvalidUserInput, - "FrameProcessor: Frame data missing width or height property"); - } - - int width = - static_cast(frameData.getProperty(runtime, "width").asNumber()); - int height = - static_cast(frameData.getProperty(runtime, "height").asNumber()); - - return cv::Size(width, height); -} +} // namespace -bool FrameProcessor::hasNativeBuffer(jsi::Runtime &runtime, - const jsi::Object &frameData) { - return frameData.hasProperty(runtime, "nativeBuffer"); -} +cv::Mat extractFrame(jsi::Runtime &runtime, const jsi::Object &frameData) { + if (!hasNativeBuffer(runtime, frameData)) { + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + "FrameProcessor: No nativeBuffer found in frame"); + } -cv::Mat FrameProcessor::extractFromNativeBuffer(jsi::Runtime &runtime, - const jsi::Object &frameData) { auto nativeBufferValue = frameData.getProperty(runtime, "nativeBuffer"); - - // Handle bigint pointer value from JavaScript uint64_t bufferPtr = static_cast( nativeBufferValue.asBigInt(runtime).asUint64(runtime)); - // Use FrameExtractor to get cv::Mat from platform-specific buffer - // Native buffer contains all metadata (width, height, format) - return FrameExtractor::extractFromNativeBuffer(bufferPtr); -} - -cv::Mat FrameProcessor::extractFromArrayBuffer(jsi::Runtime &runtime, - const jsi::Object &frameData, - int width, int height) { - auto pixelData = frameData.getProperty(runtime, "data"); - auto arrayBuffer = pixelData.asObject(runtime).getArrayBuffer(runtime); - uint8_t *data = arrayBuffer.data(runtime); - size_t bufferSize = arrayBuffer.size(runtime); - - // Determine format based on buffer size - size_t stride = bufferSize / height; - size_t expectedRGBAStride = width * 4; - size_t expectedRGBStride = width * 3; - - if (stride == expectedRGBAStride || bufferSize >= width * height * 4) { - // RGBA format with potential padding - return cv::Mat(height, width, CV_8UC4, data, stride); - } else if (stride >= expectedRGBStride) { - // RGB format - return cv::Mat(height, width, CV_8UC3, data, stride); - } else { - char errorMessage[200]; - std::snprintf( - errorMessage, sizeof(errorMessage), - "FrameProcessor: Unexpected buffer size - expected %zu or %zu bytes " - "per row, got %zu", - expectedRGBStride, expectedRGBAStride, stride); - throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, - errorMessage); - } + return extractFromNativeBuffer(bufferPtr); } - -} // namespace utils -} // namespace rnexecutorch +} // namespace rnexecutorch::utils diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h index 0838b6594..403f4bde9 100644 --- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h +++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h @@ -3,107 +3,25 @@ #include #include -namespace rnexecutorch { -namespace utils { +namespace rnexecutorch::utils { using namespace facebook; /** - * @brief Utility class for processing camera frames from VisionCamera + * @brief Extract cv::Mat from VisionCamera frame data via nativeBuffer * - * Provides high-level helpers for extracting and working with frames from - * react-native-vision-camera in a consistent way across all vision models. + * @param runtime JSI runtime + * @param frameData JSI object containing frame data from VisionCamera + * Expected properties: + * - nativeBuffer: BigInt pointer to native buffer * - * This class abstracts away the complexity of: - * - Handling both nativeBuffer (zero-copy) and ArrayBuffer (with copy) paths - * - Platform-specific buffer formats (CVPixelBuffer on iOS, AHardwareBuffer - * on Android) - * - JSI object property access and type conversions + * @return cv::Mat wrapping the frame data (zero-copy) * - * Usage: - * @code - * auto frameObj = frameData.asObject(runtime); - * cv::Mat frame = FrameProcessor::extractFrame(runtime, frameObj); - * cv::Size size = FrameProcessor::getFrameSize(runtime, frameObj); - * @endcode + * @throws RnExecutorchError if nativeBuffer is not present or extraction fails + * + * @note The returned cv::Mat does not own the data. + * Caller must ensure the source frame remains valid during use. */ -class FrameProcessor { -public: - /** - * @brief Extract cv::Mat from VisionCamera frame data - * - * Handles both zero-copy (nativeBuffer) and copy-based (ArrayBuffer) paths - * automatically. Prefers nativeBuffer when available for best performance. - * - * @param runtime JSI runtime - * @param frameData JSI object containing frame data from VisionCamera - * Expected properties: - * - nativeBuffer (optional): BigInt pointer to native buffer - * - data (optional): ArrayBuffer with pixel data - * - width: number - * - height: number - * - * @return cv::Mat wrapping or containing the frame data - * - * @throws RnExecutorchError if neither nativeBuffer nor data is available - * @throws RnExecutorchError if nativeBuffer extraction fails - * - * @note The returned cv::Mat may not own the data (zero-copy path). - * Caller must ensure the source frame remains valid during use. - */ - static cv::Mat extractFrame(jsi::Runtime &runtime, - const jsi::Object &frameData); - - /** - * @brief Get frame dimensions from VisionCamera frame data - * - * @param runtime JSI runtime - * @param frameData JSI object containing frame data - * - * @return cv::Size with frame width and height - * - * @throws RnExecutorchError if width or height properties are missing - */ - static cv::Size getFrameSize(jsi::Runtime &runtime, - const jsi::Object &frameData); - - /** - * @brief Check if frame data has nativeBuffer (zero-copy path available) - * - * @param runtime JSI runtime - * @param frameData JSI object containing frame data - * @return true if nativeBuffer is available, false otherwise - */ - static bool hasNativeBuffer(jsi::Runtime &runtime, - const jsi::Object &frameData); - -private: - /** - * @brief Extract frame from nativeBuffer pointer (zero-copy) - * - * Native buffer contains all metadata (width, height, format), so no need to - * pass dimensions separately. - * - * @param runtime JSI runtime - * @param frameData JSI object with nativeBuffer property - * @return cv::Mat wrapping the native buffer data - */ - static cv::Mat extractFromNativeBuffer(jsi::Runtime &runtime, - const jsi::Object &frameData); - - /** - * @brief Extract frame from ArrayBuffer (with copy) - * - * @param runtime JSI runtime - * @param frameData JSI object with data property - * @param width Frame width - * @param height Frame height - * @return cv::Mat containing or wrapping the array buffer data - */ - static cv::Mat extractFromArrayBuffer(jsi::Runtime &runtime, - const jsi::Object &frameData, int width, - int height); -}; +cv::Mat extractFrame(jsi::Runtime &runtime, const jsi::Object &frameData); -} // namespace utils -} // namespace rnexecutorch +} // namespace rnexecutorch::utils diff --git a/packages/react-native-executorch/src/hooks/useModule.ts b/packages/react-native-executorch/src/hooks/useModule.ts index f5f260787..82a0bb72f 100644 --- a/packages/react-native-executorch/src/hooks/useModule.ts +++ b/packages/react-native-executorch/src/hooks/useModule.ts @@ -48,7 +48,6 @@ export const useModule = < }); if (isMounted) setIsReady(true); - // Extract runOnFrame worklet from VisionModule if available // Use "state trick" to make the worklet serializable for VisionCamera if ('runOnFrame' in moduleInstance) { const worklet = moduleInstance.runOnFrame; diff --git a/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts b/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts index d6a0038ee..eabe50ab0 100644 --- a/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts +++ b/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts @@ -15,6 +15,20 @@ import { Frame, PixelData, ScalarType } from '../../types/common'; * * @category Typescript API */ +function isPixelData(input: unknown): input is PixelData { + return ( + typeof input === 'object' && + input !== null && + 'dataPtr' in input && + input.dataPtr instanceof Uint8Array && + 'sizes' in input && + Array.isArray(input.sizes) && + input.sizes.length === 3 && + 'scalarType' in input && + input.scalarType === ScalarType.BYTE + ); +} + export abstract class VisionModule extends BaseModule { /** * Synchronous worklet function for real-time VisionCamera frame processing. @@ -111,19 +125,8 @@ export abstract class VisionModule extends BaseModule { // Type detection and routing if (typeof input === 'string') { - // String path β†’ generateFromString() return await this.nativeModule.generateFromString(input, ...args); - } else if ( - typeof input === 'object' && - 'dataPtr' in input && - input.dataPtr instanceof Uint8Array && - 'sizes' in input && - Array.isArray(input.sizes) && - input.sizes.length === 3 && - 'scalarType' in input && - input.scalarType === ScalarType.BYTE - ) { - // Pixel data β†’ generateFromPixels() + } else if (isPixelData(input)) { return await this.nativeModule.generateFromPixels(input, ...args); } else { throw new RnExecutorchError( From 7753bd130880b78530821c58959e100c5bb90ea3 Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Mon, 23 Feb 2026 13:00:39 +0100 Subject: [PATCH 14/37] fix: not existing error type, add comments to JSI code --- .../host_objects/ModelHostObject.h | 60 +++++++++-------- .../metaprogramming/FunctionHelpers.h | 66 ++++++++----------- .../rnexecutorch/utils/FrameExtractor.cpp | 7 +- 3 files changed, 60 insertions(+), 73 deletions(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h index 3190bc6f4..abf920223 100644 --- a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h +++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h @@ -46,13 +46,6 @@ template class ModelHostObject : public JsiHostObject { "getInputShape")); } - if constexpr (meta::HasGenerateFromString) { - addFunctions( - JSI_EXPORT_FUNCTION(ModelHostObject, - promiseHostFunction<&Model::generateFromString>, - "generateFromString")); - } - if constexpr (meta::HasEncode) { addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject, promiseHostFunction<&Model::encode>, @@ -172,6 +165,13 @@ template class ModelHostObject : public JsiHostObject { "stream")); } + if constexpr (meta::HasGenerateFromString) { + addFunctions( + JSI_EXPORT_FUNCTION(ModelHostObject, + promiseHostFunction<&Model::generateFromString>, + "generateFromString")); + } + if constexpr (meta::HasGenerateFromFrame) { addFunctions(JSI_EXPORT_FUNCTION( ModelHostObject, visionHostFunction<&Model::generateFromFrame>, @@ -181,7 +181,7 @@ template class ModelHostObject : public JsiHostObject { if constexpr (meta::HasGenerateFromPixels) { addFunctions( JSI_EXPORT_FUNCTION(ModelHostObject, - visionHostFunction<&Model::generateFromPixels>, + promiseHostFunction<&Model::generateFromPixels>, "generateFromPixels")); } } @@ -233,36 +233,40 @@ template class ModelHostObject : public JsiHostObject { } } + /** + * Unlike promiseHostFunction, this runs synchronously on the JS thread, + * which is required for VisionCamera worklet frame processors. + * + * The key challenge is argument mapping: the C++ function takes + * (Runtime, frameData, Rest...) but from the JS side, Runtime is injected + * automatically and frameData is JS args[0]. The remaining args (Rest...) + * map to JS args[1..N]. + * + * This is achieved via TailSignature: it extracts the Rest... parameter pack + * from the function pointer type, creates a dummy free function with only + * those types, then uses createArgsTupleFromJsi on that dummy to convert + * args[1..N] β€” bypassing the manually-handled frameData at args[0]. + * + * Argument mapping: + * C++ params: (Runtime&, frameData, Rest[0], Rest[1], ...) + * JS args: ( args[0], args[1], args[2], ...) + * JS arg count = C++ arity - 1 (Runtime is injected, not counted) + * + */ template JSI_HOST_FUNCTION(visionHostFunction) { - // 1. Check Argument Count - // (We rely on our new FunctionTraits) constexpr std::size_t cppArgCount = meta::FunctionTraits::arity; - - // We expect JS args = (Total C++ Args) - (2 injected args: Runtime + Value) constexpr std::size_t expectedJsArgs = cppArgCount - 1; - log(LOG_LEVEL::Debug, cppArgCount, count); + if (count != expectedJsArgs) { throw jsi::JSError(runtime, "Argument count mismatch in vision function"); } try { - // 2. The Magic Trick - // We get a pointer to a dummy function: void dummy(Rest...) {} - // This function has exactly the signature of the arguments we want to - // parse. auto dummyFuncPtr = &meta::TailSignature::dummy; - - // 3. Let existing helpers do the work - // We pass the dummy pointer. The helper inspects its arguments (Rest...) - // and converts args[0]...args[N] accordingly. - // Note: We pass (args + 1) because JS args[0] is the PixelData, which we - // handle manually. Note: We use expectedJsArgs - 1 because we skipped one - // JS arg. auto tailArgsTuple = meta::createArgsTupleFromJsi(dummyFuncPtr, args + 1, runtime); - // 4. Invoke using ReturnType = typename meta::FunctionTraits::return_type; @@ -270,10 +274,8 @@ template class ModelHostObject : public JsiHostObject { std::apply( [&](auto &&...tailArgs) { (model.get()->*FnPtr)( - runtime, - args[0], // 1. PixelData (Manually passed) - std::forward( - tailArgs)...); // 2. The rest (Auto parsed) + runtime, args[0], + std::forward(tailArgs)...); }, std::move(tailArgsTuple)); return jsi::Value::undefined(); diff --git a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/FunctionHelpers.h b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/FunctionHelpers.h index a48aa0119..88bf04bf0 100644 --- a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/FunctionHelpers.h +++ b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/FunctionHelpers.h @@ -10,32 +10,6 @@ namespace rnexecutorch::meta { using namespace facebook; -// ========================================================================= -// 1. Function Traits (Extracts Arity, Return Type, Args) -// ========================================================================= - -template struct FunctionTraits; - -// Specialization for Member Functions -template -struct FunctionTraits { - static constexpr std::size_t arity = sizeof...(Args); - using return_type = R; - using args_tuple = std::tuple; -}; - -// Specialization for const Member Functions -template -struct FunctionTraits { - static constexpr std::size_t arity = sizeof...(Args); - using return_type = R; - using args_tuple = std::tuple; -}; - -// ========================================================================= -// 2. Argument Counting Helpers -// ========================================================================= - template constexpr std::size_t getArgumentCount(R (Model::*f)(Types...)) { return sizeof...(Types); @@ -46,10 +20,6 @@ constexpr std::size_t getArgumentCount(R (Model::*f)(Types...) const) { return sizeof...(Types); } -// ========================================================================= -// 3. JSI -> Tuple Conversion Logic -// ========================================================================= - template std::tuple fillTupleFromArgs(std::index_sequence, const jsi::Value *args, @@ -62,6 +32,7 @@ std::tuple fillTupleFromArgs(std::index_sequence, * arguments for method supplied with a pointer. The types in the tuple are * inferred from the method pointer. */ + template std::tuple createArgsTupleFromJsi(R (Model::*f)(Types...), const jsi::Value *args, @@ -78,7 +49,9 @@ std::tuple createArgsTupleFromJsi(R (Model::*f)(Types...) const, runtime); } -// Overload for free functions (used by TailSignature dummy) +// Free function overload used by visionHostFunction: accepts a dummy free +// function pointer whose parameter types (Rest...) are extracted by +// TailSignature and converted from JSI args. template std::tuple createArgsTupleFromJsi(void (*f)(Types...), const jsi::Value *args, @@ -87,27 +60,40 @@ std::tuple createArgsTupleFromJsi(void (*f)(Types...), runtime); } -// ========================================================================= -// 4. Tail Signature Helper (Crucial for Vision Functions) -// ========================================================================= +// Extracts arity, return type, and argument types from a member function +// pointer at compile time. Used by visionHostFunction to determine the expected +// JS argument count and invoke the correct return path. +template struct FunctionTraits; + +template +struct FunctionTraits { + static constexpr std::size_t arity = sizeof...(Args); + using return_type = R; + using args_tuple = std::tuple; +}; + +template +struct FunctionTraits { + static constexpr std::size_t arity = sizeof...(Args); + using return_type = R; + using args_tuple = std::tuple; +}; -// Extracts the "Tail" arguments of a function signature, skipping the first -// two arguments (Runtime and FrameValue). +// Strips the first two parameters (Runtime& and jsi::Value&) from a member +// function pointer and exposes the remaining types as a dummy free function. +// Used by visionHostFunction to parse only the tail JS args via +// createArgsTupleFromJsi, while frameData at args[0] is passed manually. template struct TailSignature; -// Non-const member function specialization template struct TailSignature { - // A dummy function that has the signature of just the "Rest" arguments. static void dummy(Rest...) {} }; -// Const member function specialization template struct TailSignature { static void dummy(Rest...) {} }; - } // namespace rnexecutorch::meta diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp index 3b31bc10f..51d206cdd 100644 --- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp @@ -84,15 +84,14 @@ cv::Mat extractFromAHardwareBuffer(void *hardwareBuffer) { char errorMessage[100]; std::snprintf(errorMessage, sizeof(errorMessage), "Unsupported AHardwareBuffer format: %u", desc.format); - throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, - errorMessage); + throw RnExecutorchError(RnExecutorchErrorCode::UnknownError, errorMessage); } // Note: We don't unlock here - Vision Camera manages the lifecycle return mat; #else - throw RnExecutorchError(RnExecutorchErrorCode::NotSupported, + throw RnExecutorchError(RnExecutorchErrorCode::UnknownError, "AHardwareBuffer requires Android API 26+"); #endif // __ANDROID_API__ >= 26 } @@ -106,7 +105,7 @@ cv::Mat extractFromNativeBuffer(uint64_t bufferPtr) { #elif defined(__ANDROID__) return extractFromAHardwareBuffer(reinterpret_cast(bufferPtr)); #else - throw RnExecutorchError(RnExecutorchErrorCode::NotSupported, + throw RnExecutorchError(RnExecutorchErrorCode::UnknownError, "NativeBuffer not supported on this platform"); #endif } From a9c01a9a7a05f41f38d98c7d9b59e847db5731e9 Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Mon, 23 Feb 2026 13:06:50 +0100 Subject: [PATCH 15/37] feat: add new PlatformNotSupported error --- .../common/rnexecutorch/ErrorCodes.h | 5 +++++ .../common/rnexecutorch/utils/FrameExtractor.cpp | 9 +++++---- .../react-native-executorch/src/errors/ErrorCodes.ts | 4 ++++ scripts/errors.config.ts | 4 ++++ 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/ErrorCodes.h b/packages/react-native-executorch/common/rnexecutorch/ErrorCodes.h index f4fd2e7f0..d49f3a175 100644 --- a/packages/react-native-executorch/common/rnexecutorch/ErrorCodes.h +++ b/packages/react-native-executorch/common/rnexecutorch/ErrorCodes.h @@ -75,6 +75,11 @@ enum class RnExecutorchErrorCode : int32_t { * interruptions. */ DownloadInterrupted = 118, + /** + * Thrown when a feature or platform is not supported in the current + * environment. + */ + PlatformNotSupported = 119, /** * Thrown when an error occurs with the tokenizer or tokenization process. */ diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp index 51d206cdd..baae35dc3 100644 --- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp @@ -43,7 +43,7 @@ cv::Mat extractFromCVPixelBuffer(void *pixelBuffer) { char errorMessage[100]; std::snprintf(errorMessage, sizeof(errorMessage), "Unsupported CVPixelBuffer format: %u", pixelFormat); - throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + throw RnExecutorchError(RnExecutorchErrorCode::PlatformNotSupported, errorMessage); } @@ -84,14 +84,15 @@ cv::Mat extractFromAHardwareBuffer(void *hardwareBuffer) { char errorMessage[100]; std::snprintf(errorMessage, sizeof(errorMessage), "Unsupported AHardwareBuffer format: %u", desc.format); - throw RnExecutorchError(RnExecutorchErrorCode::UnknownError, errorMessage); + throw RnExecutorchError(RnExecutorchErrorCode::PlatformNotSupported, + errorMessage); } // Note: We don't unlock here - Vision Camera manages the lifecycle return mat; #else - throw RnExecutorchError(RnExecutorchErrorCode::UnknownError, + throw RnExecutorchError(RnExecutorchErrorCode::PlatformNotSupported, "AHardwareBuffer requires Android API 26+"); #endif // __ANDROID_API__ >= 26 } @@ -105,7 +106,7 @@ cv::Mat extractFromNativeBuffer(uint64_t bufferPtr) { #elif defined(__ANDROID__) return extractFromAHardwareBuffer(reinterpret_cast(bufferPtr)); #else - throw RnExecutorchError(RnExecutorchErrorCode::UnknownError, + throw RnExecutorchError(RnExecutorchErrorCode::PlatformNotSupported, "NativeBuffer not supported on this platform"); #endif } diff --git a/packages/react-native-executorch/src/errors/ErrorCodes.ts b/packages/react-native-executorch/src/errors/ErrorCodes.ts index 3e4e557a1..4ccb1f07f 100644 --- a/packages/react-native-executorch/src/errors/ErrorCodes.ts +++ b/packages/react-native-executorch/src/errors/ErrorCodes.ts @@ -58,6 +58,10 @@ export enum RnExecutorchErrorCode { * Thrown when the number of downloaded files is unexpected, due to download interruptions. */ DownloadInterrupted = 118, + /** + * Thrown when a feature or platform is not supported in the current environment. + */ + PlatformNotSupported = 119, /** * Thrown when an error occurs with the tokenizer or tokenization process. */ diff --git a/scripts/errors.config.ts b/scripts/errors.config.ts index 3e6cf1090..6953eec2e 100644 --- a/scripts/errors.config.ts +++ b/scripts/errors.config.ts @@ -59,6 +59,10 @@ export const errorDefinitions = { * Thrown when the number of downloaded files is unexpected, due to download interruptions. */ DownloadInterrupted: 0x76, + /* + * Thrown when a feature or platform is not supported in the current environment. + */ + PlatformNotSupported: 0x77, /* * Thrown when an error occurs with the tokenizer or tokenization process. From 98395af46cf6634fee2ef6b1dc1dcb09da7cdb88 Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Mon, 23 Feb 2026 13:38:56 +0100 Subject: [PATCH 16/37] fix: compilation JSI error --- .../common/rnexecutorch/metaprogramming/FunctionHelpers.h | 3 ++- .../models/object_detection/ObjectDetection.cpp | 7 ++----- .../rnexecutorch/models/object_detection/ObjectDetection.h | 3 +-- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/FunctionHelpers.h b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/FunctionHelpers.h index 88bf04bf0..fde81e046 100644 --- a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/FunctionHelpers.h +++ b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/FunctionHelpers.h @@ -24,7 +24,8 @@ template std::tuple fillTupleFromArgs(std::index_sequence, const jsi::Value *args, jsi::Runtime &runtime) { - return std::make_tuple(jsi_conversion::getValue(args[I], runtime)...); + return std::tuple{ + jsi_conversion::getValue(args[I], runtime)...}; } /** diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp index 1ae2460c3..26e85da9c 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp @@ -147,12 +147,9 @@ ObjectDetection::generateFromFrame(jsi::Runtime &runtime, } std::vector -ObjectDetection::generateFromPixels(jsi::Runtime &runtime, - const jsi::Value &pixelData, +ObjectDetection::generateFromPixels(JSTensorViewIn pixelData, double detectionThreshold) { - auto tensorView = - jsi_conversion::getValue(pixelData, runtime); - cv::Mat image = extractFromPixels(tensorView); + cv::Mat image = extractFromPixels(pixelData); return runInference(image, detectionThreshold); } diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h index bf231ff0b..d32eea95e 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h @@ -26,8 +26,7 @@ class ObjectDetection : public VisionModel { generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData, double detectionThreshold); [[nodiscard("Registered non-void function")]] std::vector - generateFromPixels(jsi::Runtime &runtime, const jsi::Value &pixelData, - double detectionThreshold); + generateFromPixels(JSTensorViewIn pixelData, double detectionThreshold); protected: std::vector runInference(cv::Mat image, From ffcf72f6aa5e0fc46b4638c06b437983f26f8ac8 Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Mon, 23 Feb 2026 18:50:24 +0100 Subject: [PATCH 17/37] feat: add tests for generateFromPixels method --- .../app/object_detection/index.tsx | 133 ++---------------- .../object_detection/ObjectDetection.cpp | 4 + .../common/rnexecutorch/tests/CMakeLists.txt | 5 +- .../tests/integration/ObjectDetectionTest.cpp | 69 +++++++++ .../tests/integration/stubs/jsi_stubs.cpp | 8 ++ .../computer_vision/ObjectDetectionModule.ts | 9 +- .../src/types/objectDetection.ts | 4 +- 7 files changed, 103 insertions(+), 129 deletions(-) diff --git a/apps/computer-vision/app/object_detection/index.tsx b/apps/computer-vision/app/object_detection/index.tsx index d843682eb..6a43dd920 100644 --- a/apps/computer-vision/app/object_detection/index.tsx +++ b/apps/computer-vision/app/object_detection/index.tsx @@ -1,18 +1,16 @@ import Spinner from '../../components/Spinner'; +import { BottomBar } from '../../components/BottomBar'; import { getImage } from '../../utils'; import { Detection, useObjectDetection, SSDLITE_320_MOBILENET_V3_LARGE, - ScalarType, - PixelData, } from 'react-native-executorch'; -import { View, StyleSheet, Image, TouchableOpacity, Text } from 'react-native'; +import { View, StyleSheet, Image } from 'react-native'; import ImageWithBboxes from '../../components/ImageWithBboxes'; import React, { useContext, useEffect, useState } from 'react'; import { GeneratingContext } from '../../context'; import ScreenWrapper from '../../ScreenWrapper'; -import ColorPalette from '../../colors'; export default function ObjectDetectionScreen() { const [imageUri, setImageUri] = useState(''); @@ -44,59 +42,14 @@ export default function ObjectDetectionScreen() { const runForward = async () => { if (imageUri) { try { - console.log('Running forward with string URI...'); - const output = await ssdLite.forward(imageUri, 0.5); - console.log('String URI result:', output.length, 'detections'); + const output = await ssdLite.forward(imageUri); setResults(output); } catch (e) { - console.error('Error in runForward:', e); + console.error(e); } } }; - const runForwardPixels = async () => { - try { - console.log('Testing with hardcoded pixel data...'); - - // Create a simple 320x320 test image (all zeros - black image) - // In a real scenario, you would load actual image pixel data here - const width = 320; - const height = 320; - const channels = 3; // RGB - - // Create a black image (you can replace this with actual pixel data) - const rgbData = new Uint8Array(width * height * channels); - - // Optionally, add some test pattern (e.g., white square in center) - for (let y = 100; y < 220; y++) { - for (let x = 100; x < 220; x++) { - const idx = (y * width + x) * 3; - rgbData[idx + 0] = 255; // R - rgbData[idx + 1] = 255; // G - rgbData[idx + 2] = 255; // B - } - } - - const pixelData: PixelData = { - dataPtr: rgbData, - sizes: [height, width, channels], - scalarType: ScalarType.BYTE, - }; - - console.log('Running forward with hardcoded pixel data...', { - sizes: pixelData.sizes, - dataSize: pixelData.dataPtr.byteLength, - }); - - // Run inference using unified forward() API - const output = await ssdLite.forward(pixelData, 0.3); - console.log('Pixel data result:', output.length, 'detections'); - setResults(output); - } catch (e) { - console.error('Error in runForwardPixels:', e); - } - }; - if (!ssdLite.isReady) { return ( - - {/* Custom bottom bar with two buttons */} - - - handleCameraPress(false)}> - πŸ“· Gallery - - - - - - Run (String) - - - - Run (Pixels) - - - + ); } @@ -207,43 +129,4 @@ const styles = StyleSheet.create({ width: '100%', height: '100%', }, - bottomContainer: { - width: '100%', - gap: 15, - alignItems: 'center', - padding: 16, - flex: 1, - }, - bottomIconsContainer: { - flexDirection: 'row', - justifyContent: 'center', - width: '100%', - }, - iconText: { - fontSize: 16, - color: ColorPalette.primary, - }, - buttonsRow: { - flexDirection: 'row', - width: '100%', - gap: 10, - }, - button: { - height: 50, - justifyContent: 'center', - alignItems: 'center', - backgroundColor: ColorPalette.primary, - color: '#fff', - borderRadius: 8, - }, - halfButton: { - flex: 1, - }, - buttonDisabled: { - opacity: 0.5, - }, - buttonText: { - color: '#fff', - fontSize: 16, - }, }); diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp index 26e85da9c..2670cf9dd 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp @@ -106,6 +106,10 @@ ObjectDetection::postprocess(const std::vector &tensors, std::vector ObjectDetection::runInference(cv::Mat image, double detectionThreshold) { + if (detectionThreshold < 0.0 || detectionThreshold > 1.0) { + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + "detectionThreshold must be in range [0, 1]"); + } std::scoped_lock lock(inference_mutex_); cv::Size originalSize = image.size(); diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt b/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt index e2a8c16bf..c45ab9107 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt +++ b/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt @@ -156,8 +156,11 @@ add_rn_test(ObjectDetectionTests integration/ObjectDetectionTest.cpp SOURCES ${RNEXECUTORCH_DIR}/models/object_detection/ObjectDetection.cpp ${RNEXECUTORCH_DIR}/models/object_detection/Utils.cpp + ${RNEXECUTORCH_DIR}/models/VisionModel.cpp + ${RNEXECUTORCH_DIR}/utils/FrameProcessor.cpp + ${RNEXECUTORCH_DIR}/utils/FrameExtractor.cpp ${IMAGE_UTILS_SOURCES} - LIBS opencv_deps + LIBS opencv_deps android ) add_rn_test(ImageEmbeddingsTests integration/ImageEmbeddingsTest.cpp diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp index 074ee0751..93cdbf07c 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp @@ -1,6 +1,8 @@ #include "BaseModelTests.h" +#include #include #include +#include #include #include @@ -115,6 +117,73 @@ TEST(ObjectDetectionGenerateTests, DetectionsHaveValidLabels) { } } +// ============================================================================ +// generateFromPixels tests +// ============================================================================ +TEST(ObjectDetectionPixelTests, ValidPixelDataReturnsResults) { + ObjectDetection model(kValidObjectDetectionModelPath, nullptr); + constexpr int width = 4, height = 4, channels = 3; + std::vector pixelData(width * height * channels, 128); + JSTensorViewIn tensorView{pixelData.data(), + {height, width, channels}, + executorch::aten::ScalarType::Byte}; + auto results = model.generateFromPixels(tensorView, 0.3); + EXPECT_GE(results.size(), 0u); +} + +TEST(ObjectDetectionPixelTests, WrongSizesLengthThrows) { + ObjectDetection model(kValidObjectDetectionModelPath, nullptr); + std::vector pixelData(16, 0); + JSTensorViewIn tensorView{ + pixelData.data(), {4, 4}, executorch::aten::ScalarType::Byte}; + EXPECT_THROW((void)model.generateFromPixels(tensorView, 0.5), + RnExecutorchError); +} + +TEST(ObjectDetectionPixelTests, WrongChannelCountThrows) { + ObjectDetection model(kValidObjectDetectionModelPath, nullptr); + constexpr int width = 4, height = 4, channels = 4; + std::vector pixelData(width * height * channels, 0); + JSTensorViewIn tensorView{pixelData.data(), + {height, width, channels}, + executorch::aten::ScalarType::Byte}; + EXPECT_THROW((void)model.generateFromPixels(tensorView, 0.5), + RnExecutorchError); +} + +TEST(ObjectDetectionPixelTests, WrongScalarTypeThrows) { + ObjectDetection model(kValidObjectDetectionModelPath, nullptr); + constexpr int width = 4, height = 4, channels = 3; + std::vector pixelData(width * height * channels, 0); + JSTensorViewIn tensorView{pixelData.data(), + {height, width, channels}, + executorch::aten::ScalarType::Float}; + EXPECT_THROW((void)model.generateFromPixels(tensorView, 0.5), + RnExecutorchError); +} + +TEST(ObjectDetectionPixelTests, NegativeThresholdThrows) { + ObjectDetection model(kValidObjectDetectionModelPath, nullptr); + constexpr int width = 4, height = 4, channels = 3; + std::vector pixelData(width * height * channels, 128); + JSTensorViewIn tensorView{pixelData.data(), + {height, width, channels}, + executorch::aten::ScalarType::Byte}; + EXPECT_THROW((void)model.generateFromPixels(tensorView, -0.1), + RnExecutorchError); +} + +TEST(ObjectDetectionPixelTests, ThresholdAboveOneThrows) { + ObjectDetection model(kValidObjectDetectionModelPath, nullptr); + constexpr int width = 4, height = 4, channels = 3; + std::vector pixelData(width * height * channels, 128); + JSTensorViewIn tensorView{pixelData.data(), + {height, width, channels}, + executorch::aten::ScalarType::Byte}; + EXPECT_THROW((void)model.generateFromPixels(tensorView, 1.1), + RnExecutorchError); +} + TEST(ObjectDetectionInheritedTests, GetInputShapeWorks) { ObjectDetection model(kValidObjectDetectionModelPath, nullptr); auto shape = model.getInputShape("forward", 0); diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/stubs/jsi_stubs.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/stubs/jsi_stubs.cpp index 39b8ae09c..897a2778e 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/stubs/jsi_stubs.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/stubs/jsi_stubs.cpp @@ -14,6 +14,14 @@ namespace facebook::jsi { MutableBuffer::~MutableBuffer() {} Value::~Value() {} Value::Value(Value &&other) noexcept {} + +// Needed to link ObjectDetectionTests: generateFromFrame and FrameProcessor +// pull in these JSI symbols, but they are never called in tests. +Object Value::asObject(Runtime &) const & { __builtin_unreachable(); } +BigInt Value::asBigInt(Runtime &) const & { __builtin_unreachable(); } + +uint64_t BigInt::asUint64(Runtime &) const { return 0; } + } // namespace facebook::jsi namespace facebook::react { diff --git a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts index 0818d9682..e62c7221c 100644 --- a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts +++ b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts @@ -1,5 +1,5 @@ import { ResourceFetcher } from '../../utils/ResourceFetcher'; -import { ResourceSource } from '../../types/common'; +import { ResourceSource, PixelData } from '../../types/common'; import { Detection } from '../../types/objectDetection'; import { RnExecutorchErrorCode } from '../../errors/ErrorCodes'; import { parseUnknownError, RnExecutorchError } from '../../errors/errorUtils'; @@ -41,4 +41,11 @@ export class ObjectDetectionModule extends VisionModule { throw parseUnknownError(error); } } + + async forward( + input: string | PixelData, + detectionThreshold: number = 0.5 + ): Promise { + return super.forward(input, detectionThreshold); + } } diff --git a/packages/react-native-executorch/src/types/objectDetection.ts b/packages/react-native-executorch/src/types/objectDetection.ts index c2281598a..11953c954 100644 --- a/packages/react-native-executorch/src/types/objectDetection.ts +++ b/packages/react-native-executorch/src/types/objectDetection.ts @@ -179,7 +179,7 @@ export interface ObjectDetectionType { * **Note**: For VisionCamera frame processing, use `processFrame` instead. * * @param input - Image source (string or PixelData object) - * @param detectionThreshold - An optional number between 0 and 1 representing the minimum confidence score. Default is 0.7. + * @param detectionThreshold - An optional number between 0 and 1 representing the minimum confidence score. Default is 0.5. * @returns A Promise that resolves to an array of `Detection` objects. * @throws {RnExecutorchError} If the model is not loaded or is currently processing another image. * @@ -225,7 +225,7 @@ export interface ObjectDetectionType { * ``` * * @param frame - VisionCamera Frame object - * @param detectionThreshold - The threshold for detection sensitivity. Default is 0.7. + * @param detectionThreshold - The threshold for detection sensitivity. Default is 0.5. * @returns Array of Detection objects representing detected items in the frame. */ runOnFrame: From 44676fc65a080c917117438530c5c081946bb1f7 Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Mon, 23 Feb 2026 19:32:05 +0100 Subject: [PATCH 18/37] feat: add example screen with vision camera to computer vision app --- ...ative-vision-camera@npm-5.0.0-beta.1.patch | 713 ++++++++++++++++++ apps/computer-vision/app.json | 3 +- apps/computer-vision/app/_layout.tsx | 8 + apps/computer-vision/app/index.tsx | 6 + .../app/object_detection_live/index.tsx | 224 ++++++ apps/computer-vision/package.json | 5 +- .../src/types/objectDetection.ts | 4 +- yarn.lock | 47 +- 8 files changed, 995 insertions(+), 15 deletions(-) create mode 100644 .yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch create mode 100644 apps/computer-vision/app/object_detection_live/index.tsx diff --git a/.yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch b/.yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch new file mode 100644 index 000000000..73f999e9a --- /dev/null +++ b/.yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch @@ -0,0 +1,713 @@ +diff --git a/lib/expo-plugin/withVisionCamera.js b/lib/expo-plugin/withVisionCamera.js +index 32418a9..f7a8c5c 100644 +--- a/lib/expo-plugin/withVisionCamera.js ++++ b/lib/expo-plugin/withVisionCamera.js +@@ -1,4 +1,4 @@ +-import { AndroidConfig, withPlugins, } from '@expo/config-plugins'; ++const { AndroidConfig, withPlugins } = require('@expo/config-plugins'); + const CAMERA_USAGE = 'Allow $(PRODUCT_NAME) to access your camera'; + const MICROPHONE_USAGE = 'Allow $(PRODUCT_NAME) to access your microphone'; + const withVisionCamera = (config, props = {}) => { +@@ -30,4 +30,4 @@ const withVisionCamera = (config, props = {}) => { + [AndroidConfig.Permissions.withPermissions, androidPermissions], + ]); + }; +-export default withVisionCamera; ++module.exports = withVisionCamera; +diff --git a/cpp/Frame Processors/HybridWorkletQueueFactory.cpp b/cpp/Frame Processors/HybridWorkletQueueFactory.cpp +new file mode 100644 +index 0000000..5da4ef9 +--- /dev/null ++++ b/cpp/Frame Processors/HybridWorkletQueueFactory.cpp +@@ -0,0 +1,50 @@ ++/// ++/// HybridWorkletQueueFactory.cpp ++/// VisionCamera ++/// Copyright Β© 2025 Marc Rousavy @ Margelo ++/// ++ ++#include "HybridWorkletQueueFactory.hpp" ++ ++#include "JSIConverter+AsyncQueue.hpp" ++#include "NativeThreadAsyncQueue.hpp" ++#include "NativeThreadDispatcher.hpp" ++#include ++#include ++ ++namespace margelo::nitro::camera { ++ ++HybridWorkletQueueFactory::HybridWorkletQueueFactory() : HybridObject(TAG) {} ++ ++void HybridWorkletQueueFactory::loadHybridMethods() { ++ HybridWorkletQueueFactorySpec::loadHybridMethods(); ++ registerHybrids(this, [](Prototype& prototype) { ++ prototype.registerRawHybridMethod("installDispatcher", 1, &HybridWorkletQueueFactory::installDispatcher); ++ }); ++} ++ ++std::shared_ptr HybridWorkletQueueFactory::wrapThreadInQueue(const std::shared_ptr& thread) { ++ return std::make_shared(thread); ++} ++ ++double HybridWorkletQueueFactory::getCurrentThreadMarker() { ++ static std::atomic_size_t threadCounter{1}; ++ static thread_local size_t thisThreadId{0}; ++ if (thisThreadId == 0) { ++ thisThreadId = threadCounter.fetch_add(1); ++ } ++ return static_cast(thisThreadId); ++} ++ ++jsi::Value HybridWorkletQueueFactory::installDispatcher(jsi::Runtime& runtime, const jsi::Value&, const jsi::Value* args, size_t count) { ++ if (count != 1) ++ throw std::runtime_error("installDispatcher(..) must be called with exactly 1 argument!"); ++ auto thread = JSIConverter>::fromJSI(runtime, args[0]); ++ ++ auto dispatcher = std::make_shared(thread); ++ Dispatcher::installRuntimeGlobalDispatcher(runtime, dispatcher); ++ ++ return jsi::Value::undefined(); ++} ++ ++} // namespace margelo::nitro::camera +diff --git a/android/CMakeLists.txt b/android/CMakeLists.txt +index 0000000..1111111 100644 +--- a/android/CMakeLists.txt ++++ b/android/CMakeLists.txt +@@ -20,6 +20,7 @@ + "src/main/cpp" + "../cpp" + "../cpp/Frame Processors" ++ "../nitrogen/generated/shared/c++" + ) + + find_library(LOG_LIB log) +diff --git a/nitrogen/generated/shared/c++/HybridWorkletQueueFactory.cpp b/nitrogen/generated/shared/c++/HybridWorkletQueueFactory.cpp +new file mode 100644 +index 0000000..5da4ef9 +--- /dev/null ++++ b/nitrogen/generated/shared/c++/HybridWorkletQueueFactory.cpp +@@ -0,0 +1,50 @@ ++/// ++/// HybridWorkletQueueFactory.cpp ++/// VisionCamera ++/// Copyright Β© 2025 Marc Rousavy @ Margelo ++/// ++ ++#include "HybridWorkletQueueFactory.hpp" ++ ++#include "JSIConverter+AsyncQueue.hpp" ++#include "NativeThreadAsyncQueue.hpp" ++#include "NativeThreadDispatcher.hpp" ++#include ++#include ++ ++namespace margelo::nitro::camera { ++ ++HybridWorkletQueueFactory::HybridWorkletQueueFactory() : HybridObject(TAG) {} ++ ++void HybridWorkletQueueFactory::loadHybridMethods() { ++ HybridWorkletQueueFactorySpec::loadHybridMethods(); ++ registerHybrids(this, [](Prototype& prototype) { ++ prototype.registerRawHybridMethod("installDispatcher", 1, &HybridWorkletQueueFactory::installDispatcher); ++ }); ++} ++ ++std::shared_ptr HybridWorkletQueueFactory::wrapThreadInQueue(const std::shared_ptr& thread) { ++ return std::make_shared(thread); ++} ++ ++double HybridWorkletQueueFactory::getCurrentThreadMarker() { ++ static std::atomic_size_t threadCounter{1}; ++ static thread_local size_t thisThreadId{0}; ++ if (thisThreadId == 0) { ++ thisThreadId = threadCounter.fetch_add(1); ++ } ++ return static_cast(thisThreadId); ++} ++ ++jsi::Value HybridWorkletQueueFactory::installDispatcher(jsi::Runtime& runtime, const jsi::Value&, const jsi::Value* args, size_t count) { ++ if (count != 1) ++ throw std::runtime_error("installDispatcher(..) must be called with exactly 1 argument!"); ++ auto thread = JSIConverter>::fromJSI(runtime, args[0]); ++ ++ auto dispatcher = std::make_shared(thread); ++ Dispatcher::installRuntimeGlobalDispatcher(runtime, dispatcher); ++ ++ return jsi::Value::undefined(); ++} ++ ++} // namespace margelo::nitro::camera +diff --git a/nitrogen/generated/shared/c++/HybridWorkletQueueFactory.hpp b/nitrogen/generated/shared/c++/HybridWorkletQueueFactory.hpp +new file mode 100644 +index 0000000..daa16d2 +--- /dev/null ++++ b/nitrogen/generated/shared/c++/HybridWorkletQueueFactory.hpp +@@ -0,0 +1,29 @@ ++/// ++/// HybridWorkletQueueFactory.hpp ++/// VisionCamera ++/// Copyright Β© 2025 Marc Rousavy @ Margelo ++/// ++ ++#pragma once ++ ++#include "HybridWorkletQueueFactorySpec.hpp" ++#include "JSIConverter+AsyncQueue.hpp" ++#include ++#include ++ ++namespace margelo::nitro::camera { ++ ++class HybridWorkletQueueFactory : public HybridWorkletQueueFactorySpec { ++public: ++ HybridWorkletQueueFactory(); ++ ++public: ++ std::shared_ptr wrapThreadInQueue(const std::shared_ptr& thread) override; ++ double getCurrentThreadMarker() override; ++ ++ jsi::Value installDispatcher(jsi::Runtime& runtime, const jsi::Value&, const jsi::Value* args, size_t count); ++ ++ void loadHybridMethods() override; ++}; ++ ++} // namespace margelo::nitro::camera +diff --git a/nitrogen/generated/shared/c++/JSIConverter+AsyncQueue.hpp b/nitrogen/generated/shared/c++/JSIConverter+AsyncQueue.hpp +new file mode 100644 +index 0000000..5b93f2d +--- /dev/null ++++ b/nitrogen/generated/shared/c++/JSIConverter+AsyncQueue.hpp +@@ -0,0 +1,24 @@ ++/// ++/// JSIConverter+AsyncQueue.swift ++/// VisionCamera ++/// Copyright Β© 2025 Marc Rousavy @ Margelo ++/// ++ ++#pragma once ++ ++#include ++#include ++#if __has_include() ++#include ++#elif __has_include() ++#include ++#else ++#error react-native-worklets Prefab not found! ++#endif ++ ++namespace margelo::nitro { ++ ++// JSIConverter> is implemented ++// in JSIConverter> ++ ++} +diff --git a/nitrogen/generated/shared/c++/NativeThreadAsyncQueue.hpp b/nitrogen/generated/shared/c++/NativeThreadAsyncQueue.hpp +new file mode 100644 +index 0000000..d5a0958 +--- /dev/null ++++ b/nitrogen/generated/shared/c++/NativeThreadAsyncQueue.hpp +@@ -0,0 +1,34 @@ ++/// ++/// NativeThreadAsyncQueue.hpp ++/// VisionCamera ++/// Copyright Β© 2025 Marc Rousavy @ Margelo ++/// ++ ++#pragma once ++ ++#include "HybridNativeThreadSpec.hpp" ++#include "JSIConverter+AsyncQueue.hpp" ++#include ++ ++namespace margelo::nitro::camera { ++ ++/** ++ * An implementation of `worklets::AsyncQueue` that uses a `NativeThread` to run its jobs. ++ * ++ * The `NativeThread` (`HybridNativeThreadSpec`) is a platform-implemented object, ++ * e.g. using `DispatchQueue` on iOS. ++ */ ++class NativeThreadAsyncQueue : public worklets::AsyncQueue { ++public: ++ NativeThreadAsyncQueue(std::shared_ptr thread) : _thread(std::move(thread)) {} ++ ++ void push(std::function&& job) override { ++ auto jobCopy = job; ++ _thread->runOnThread(jobCopy); ++ } ++ ++private: ++ std::shared_ptr _thread; ++}; ++ ++} // namespace margelo::nitro::camera +diff --git a/nitrogen/generated/shared/c++/NativeThreadDispatcher.hpp b/nitrogen/generated/shared/c++/NativeThreadDispatcher.hpp +new file mode 100644 +index 0000000..758d2f2 +--- /dev/null ++++ b/nitrogen/generated/shared/c++/NativeThreadDispatcher.hpp +@@ -0,0 +1,36 @@ ++/// ++/// NativeThreadDispatcher.hpp ++/// VisionCamera ++/// Copyright Β© 2025 Marc Rousavy @ Margelo ++/// ++ ++#pragma once ++ ++#include "HybridNativeThreadSpec.hpp" ++#include "JSIConverter+AsyncQueue.hpp" ++#include ++ ++namespace margelo::nitro::camera { ++ ++/** ++ * An implementation of `nitro::Dispatcher` that uses a `NativeThread` to run its jobs. ++ * ++ * The `NativeThread` (`HybridNativeThreadSpec`) is a platform-implemented object, ++ * e.g. using `DispatchQueue` on iOS. ++ */ ++class NativeThreadDispatcher : public nitro::Dispatcher { ++public: ++ NativeThreadDispatcher(std::shared_ptr thread) : _thread(std::move(thread)) {} ++ ++ void runSync(std::function&&) override { ++ throw std::runtime_error("runSync(...) is not implemented for NativeThreadDispatcher!"); ++ } ++ void runAsync(std::function&& function) override { ++ _thread->runOnThread(function); ++ } ++ ++private: ++ std::shared_ptr _thread; ++}; ++ ++} // namespace margelo::nitro::camera +diff --git a/nitrogen/generated/android/kotlin/com/margelo/nitro/camera/BoundingBox.kt b/nitrogen/generated/android/kotlin/com/margelo/nitro/camera/BoundingBox.kt +new file mode 100644 +index 0000000..aaaaaaa +--- /dev/null ++++ b/nitrogen/generated/android/kotlin/com/margelo/nitro/camera/BoundingBox.kt +@@ -0,0 +1,47 @@ ++/// ++/// BoundingBox.kt ++/// This file was generated by nitrogen. DO NOT MODIFY THIS FILE. ++/// https://github.com/mrousavy/nitro ++/// Copyright Β© Marc Rousavy @ Margelo ++/// ++ ++package com.margelo.nitro.camera ++ ++import androidx.annotation.Keep ++import com.facebook.proguard.annotations.DoNotStrip ++ ++ ++/** ++ * Represents the JavaScript object/struct "BoundingBox". ++ */ ++@DoNotStrip ++@Keep ++data class BoundingBox( ++ @DoNotStrip ++ @Keep ++ val x: Double, ++ @DoNotStrip ++ @Keep ++ val y: Double, ++ @DoNotStrip ++ @Keep ++ val width: Double, ++ @DoNotStrip ++ @Keep ++ val height: Double ++) { ++ /* primary constructor */ ++ ++ companion object { ++ /** ++ * Constructor called from C++ ++ */ ++ @DoNotStrip ++ @Keep ++ @Suppress("unused") ++ @JvmStatic ++ private fun fromCpp(x: Double, y: Double, width: Double, height: Double): BoundingBox { ++ return BoundingBox(x, y, width, height) ++ } ++ } ++} +diff --git a/nitrogen/generated/android/kotlin/com/margelo/nitro/camera/HybridScannedObjectSpec.kt b/nitrogen/generated/android/kotlin/com/margelo/nitro/camera/HybridScannedObjectSpec.kt +new file mode 100644 +index 0000000..bbbbbbb +--- /dev/null ++++ b/nitrogen/generated/android/kotlin/com/margelo/nitro/camera/HybridScannedObjectSpec.kt +@@ -0,0 +1,60 @@ ++/// ++/// HybridScannedObjectSpec.kt ++/// This file was generated by nitrogen. DO NOT MODIFY THIS FILE. ++/// https://github.com/mrousavy/nitro ++/// Copyright Β© Marc Rousavy @ Margelo ++/// ++ ++package com.margelo.nitro.camera ++ ++import androidx.annotation.Keep ++import com.facebook.jni.HybridData ++import com.facebook.proguard.annotations.DoNotStrip ++import com.margelo.nitro.core.HybridObject ++ ++/** ++ * A Kotlin class representing the ScannedObject HybridObject. ++ * Implement this abstract class to create Kotlin-based instances of ScannedObject. ++ */ ++@DoNotStrip ++@Keep ++@Suppress( ++ "KotlinJniMissingFunction", "unused", ++ "RedundantSuppression", "RedundantUnitReturnType", "SimpleRedundantLet", ++ "LocalVariableName", "PropertyName", "PrivatePropertyName", "FunctionName" ++) ++abstract class HybridScannedObjectSpec: HybridObject() { ++ @DoNotStrip ++ private var mHybridData: HybridData = initHybrid() ++ ++ init { ++ super.updateNative(mHybridData) ++ } ++ ++ override fun updateNative(hybridData: HybridData) { ++ mHybridData = hybridData ++ super.updateNative(hybridData) ++ } ++ ++ // Default implementation of `HybridObject.toString()` ++ override fun toString(): String { ++ return "[HybridObject ScannedObject]" ++ } ++ ++ // Properties ++ @get:DoNotStrip ++ @get:Keep ++ abstract val type: ScannedObjectType ++ ++ @get:DoNotStrip ++ @get:Keep ++ abstract val boundingBox: BoundingBox ++ ++ // Methods ++ ++ private external fun initHybrid(): HybridData ++ ++ companion object { ++ protected const val TAG = "HybridScannedObjectSpec" ++ } ++} +diff --git a/nitrogen/generated/android/c++/JBoundingBox.hpp b/nitrogen/generated/android/c++/JBoundingBox.hpp +new file mode 100644 +index 0000000..ccccccc +--- /dev/null ++++ b/nitrogen/generated/android/c++/JBoundingBox.hpp +@@ -0,0 +1,69 @@ ++/// ++/// JBoundingBox.hpp ++/// This file was generated by nitrogen. DO NOT MODIFY THIS FILE. ++/// https://github.com/mrousavy/nitro ++/// Copyright Β© Marc Rousavy @ Margelo ++/// ++ ++#pragma once ++ ++#include ++#include "BoundingBox.hpp" ++ ++ ++ ++namespace margelo::nitro::camera { ++ ++ using namespace facebook; ++ ++ /** ++ * The C++ JNI bridge between the C++ struct "BoundingBox" and the the Kotlin data class "BoundingBox". ++ */ ++ struct JBoundingBox final: public jni::JavaClass { ++ public: ++ static auto constexpr kJavaDescriptor = "Lcom/margelo/nitro/camera/BoundingBox;"; ++ ++ public: ++ /** ++ * Convert this Java/Kotlin-based struct to the C++ struct BoundingBox by copying all values to C++. ++ */ ++ [[maybe_unused]] ++ [[nodiscard]] ++ BoundingBox toCpp() const { ++ static const auto clazz = javaClassStatic(); ++ static const auto fieldX = clazz->getField("x"); ++ double x = this->getFieldValue(fieldX); ++ static const auto fieldY = clazz->getField("y"); ++ double y = this->getFieldValue(fieldY); ++ static const auto fieldWidth = clazz->getField("width"); ++ double width = this->getFieldValue(fieldWidth); ++ static const auto fieldHeight = clazz->getField("height"); ++ double height = this->getFieldValue(fieldHeight); ++ return BoundingBox( ++ x, ++ y, ++ width, ++ height ++ ); ++ } ++ ++ public: ++ /** ++ * Create a Java/Kotlin-based struct by copying all values from the given C++ struct to Java. ++ */ ++ [[maybe_unused]] ++ static jni::local_ref fromCpp(const BoundingBox& value) { ++ using JSignature = JBoundingBox(double, double, double, double); ++ static const auto clazz = javaClassStatic(); ++ static const auto create = clazz->getStaticMethod("fromCpp"); ++ return create( ++ clazz, ++ value.x, ++ value.y, ++ value.width, ++ value.height ++ ); ++ } ++ }; ++ ++} // namespace margelo::nitro::camera +diff --git a/nitrogen/generated/android/c++/JHybridScannedObjectSpec.hpp b/nitrogen/generated/android/c++/JHybridScannedObjectSpec.hpp +new file mode 100644 +index 0000000..ddddddd +--- /dev/null ++++ b/nitrogen/generated/android/c++/JHybridScannedObjectSpec.hpp +@@ -0,0 +1,63 @@ ++/// ++/// JHybridScannedObjectSpec.hpp ++/// This file was generated by nitrogen. DO NOT MODIFY THIS FILE. ++/// https://github.com/mrousavy/nitro ++/// Copyright Β© Marc Rousavy @ Margelo ++/// ++ ++#pragma once ++ ++#include ++#include ++#include "HybridScannedObjectSpec.hpp" ++ ++ ++ ++ ++namespace margelo::nitro::camera { ++ ++ using namespace facebook; ++ ++ class JHybridScannedObjectSpec: public jni::HybridClass, ++ public virtual HybridScannedObjectSpec { ++ public: ++ static auto constexpr kJavaDescriptor = "Lcom/margelo/nitro/camera/HybridScannedObjectSpec;"; ++ static jni::local_ref initHybrid(jni::alias_ref jThis); ++ static void registerNatives(); ++ ++ protected: ++ // C++ constructor (called from Java via `initHybrid()`) ++ explicit JHybridScannedObjectSpec(jni::alias_ref jThis) : ++ HybridObject(HybridScannedObjectSpec::TAG), ++ HybridBase(jThis), ++ _javaPart(jni::make_global(jThis)) {} ++ ++ public: ++ ~JHybridScannedObjectSpec() override { ++ // Hermes GC can destroy JS objects on a non-JNI Thread. ++ jni::ThreadScope::WithClassLoader([&] { _javaPart.reset(); }); ++ } ++ ++ public: ++ size_t getExternalMemorySize() noexcept override; ++ bool equals(const std::shared_ptr& other) override; ++ void dispose() noexcept override; ++ std::string toString() override; ++ ++ public: ++ inline const jni::global_ref& getJavaPart() const noexcept { ++ return _javaPart; ++ } ++ ++ public: ++ // Properties ++ ScannedObjectType getType() override; ++ BoundingBox getBoundingBox() override; ++ ++ private: ++ friend HybridBase; ++ using HybridBase::HybridBase; ++ jni::global_ref _javaPart; ++ }; ++ ++} // namespace margelo::nitro::camera +diff --git a/nitrogen/generated/android/VisionCamera+autolinking.cmake b/nitrogen/generated/android/VisionCamera+autolinking.cmake +index 0000000..1111111 100644 +--- a/nitrogen/generated/android/VisionCamera+autolinking.cmake ++++ b/nitrogen/generated/android/VisionCamera+autolinking.cmake +@@ -112,3 +112,4 @@ + ../nitrogen/generated/android/c++/JHybridPreviewViewSpec.cpp + ../nitrogen/generated/android/c++/views/JHybridPreviewViewStateUpdater.cpp ++ ../nitrogen/generated/android/c++/JHybridScannedObjectSpec.cpp + ) +diff --git a/nitrogen/generated/android/c++/JHybridScannedObjectSpec.cpp b/nitrogen/generated/android/c++/JHybridScannedObjectSpec.cpp +new file mode 100644 +index 0000000..eeeeeee +--- /dev/null ++++ b/nitrogen/generated/android/c++/JHybridScannedObjectSpec.cpp +@@ -0,0 +1,69 @@ ++/// ++/// JHybridScannedObjectSpec.cpp ++/// This file was generated by nitrogen. DO NOT MODIFY THIS FILE. ++/// https://github.com/mrousavy/nitro ++/// Copyright Β© Marc Rousavy @ Margelo ++/// ++ ++#include "JHybridScannedObjectSpec.hpp" ++ ++// Forward declaration of `ScannedObjectType` to properly resolve imports. ++namespace margelo::nitro::camera { enum class ScannedObjectType; } ++// Forward declaration of `BoundingBox` to properly resolve imports. ++namespace margelo::nitro::camera { struct BoundingBox; } ++ ++#include "ScannedObjectType.hpp" ++#include "JScannedObjectType.hpp" ++#include "BoundingBox.hpp" ++#include "JBoundingBox.hpp" ++ ++namespace margelo::nitro::camera { ++ ++ jni::local_ref JHybridScannedObjectSpec::initHybrid(jni::alias_ref jThis) { ++ return makeCxxInstance(jThis); ++ } ++ ++ void JHybridScannedObjectSpec::registerNatives() { ++ registerHybrid({ ++ makeNativeMethod("initHybrid", JHybridScannedObjectSpec::initHybrid), ++ }); ++ } ++ ++ size_t JHybridScannedObjectSpec::getExternalMemorySize() noexcept { ++ static const auto method = javaClassStatic()->getMethod("getMemorySize"); ++ return method(_javaPart); ++ } ++ ++ bool JHybridScannedObjectSpec::equals(const std::shared_ptr& other) { ++ if (auto otherCast = std::dynamic_pointer_cast(other)) { ++ return _javaPart == otherCast->_javaPart; ++ } ++ return false; ++ } ++ ++ void JHybridScannedObjectSpec::dispose() noexcept { ++ static const auto method = javaClassStatic()->getMethod("dispose"); ++ method(_javaPart); ++ } ++ ++ std::string JHybridScannedObjectSpec::toString() { ++ static const auto method = javaClassStatic()->getMethod("toString"); ++ auto javaString = method(_javaPart); ++ return javaString->toStdString(); ++ } ++ ++ // Properties ++ ScannedObjectType JHybridScannedObjectSpec::getType() { ++ static const auto method = javaClassStatic()->getMethod()>("getType"); ++ auto __result = method(_javaPart); ++ return __result->toCpp(); ++ } ++ BoundingBox JHybridScannedObjectSpec::getBoundingBox() { ++ static const auto method = javaClassStatic()->getMethod()>("getBoundingBox"); ++ auto __result = method(_javaPart); ++ return __result->toCpp(); ++ } ++ ++ // Methods ++ ++} // namespace margelo::nitro::camera +diff --git a/android/src/main/java/com/margelo/nitro/camera/hybrids/recording/HybridVideoRecorder.kt b/android/src/main/java/com/margelo/nitro/camera/hybrids/recording/HybridVideoRecorder.kt +index aaaaaaa..bbbbbbb 100644 +--- a/android/src/main/java/com/margelo/nitro/camera/hybrids/recording/HybridVideoRecorder.kt ++++ b/android/src/main/java/com/margelo/nitro/camera/hybrids/recording/HybridVideoRecorder.kt +@@ -55,6 +55,6 @@ + when (event) { + is VideoRecordEvent.Start -> { +- promise.resolve() ++ promise.resolve(Unit) + didResolve = true + } + +@@ -98,27 +98,48 @@ + override fun stopRecording(): Promise { +- return Promise.parallel(executor) { +- val recording = recording ?: throw Error("Not currently recording!") +- recording.stop() +- this.isPaused = false +- this.recording = null +- this.recordedDuration = 0.0 +- this.recordedFileSize = 0.0 +- } ++ val promise = Promise() ++ executor.execute { ++ try { ++ val recording = recording ?: throw Error("Not currently recording!") ++ recording.stop() ++ this.isPaused = false ++ this.recording = null ++ this.recordedDuration = 0.0 ++ this.recordedFileSize = 0.0 ++ promise.resolve(Unit) ++ } catch (e: Throwable) { ++ promise.reject(e) ++ } ++ } ++ return promise + } + + override fun pauseRecording(): Promise { +- return Promise.parallel(executor) { +- val recording = recording ?: throw Error("Not currently recording!") +- recording.pause() +- this.isPaused = true +- } ++ val promise = Promise() ++ executor.execute { ++ try { ++ val recording = recording ?: throw Error("Not currently recording!") ++ recording.pause() ++ this.isPaused = true ++ promise.resolve(Unit) ++ } catch (e: Throwable) { ++ promise.reject(e) ++ } ++ } ++ return promise + } + + override fun resumeRecording(): Promise { +- return Promise.parallel(executor) { +- val recording = recording ?: throw Error("Not currently recording!") +- recording.resume() +- this.isPaused = false +- } ++ val promise = Promise() ++ executor.execute { ++ try { ++ val recording = recording ?: throw Error("Not currently recording!") ++ recording.resume() ++ this.isPaused = false ++ promise.resolve(Unit) ++ } catch (e: Throwable) { ++ promise.reject(e) ++ } ++ } ++ return promise + } + } diff --git a/apps/computer-vision/app.json b/apps/computer-vision/app.json index 4d68c039b..5db8c1390 100644 --- a/apps/computer-vision/app.json +++ b/apps/computer-vision/app.json @@ -25,7 +25,8 @@ "foregroundImage": "./assets/icons/adaptive-icon.png", "backgroundColor": "#ffffff" }, - "package": "com.anonymous.computervision" + "package": "com.anonymous.computervision", + "permissions": ["android.permission.CAMERA"] }, "web": { "favicon": "./assets/icons/favicon.png" diff --git a/apps/computer-vision/app/_layout.tsx b/apps/computer-vision/app/_layout.tsx index 35fba7fb1..3970ac316 100644 --- a/apps/computer-vision/app/_layout.tsx +++ b/apps/computer-vision/app/_layout.tsx @@ -83,6 +83,14 @@ export default function _layout() { headerTitleStyle: { color: ColorPalette.primary }, }} /> + Object Detection + router.navigate('object_detection_live/')} + > + Object Detection Live + router.navigate('ocr/')} diff --git a/apps/computer-vision/app/object_detection_live/index.tsx b/apps/computer-vision/app/object_detection_live/index.tsx new file mode 100644 index 000000000..68fdd4fed --- /dev/null +++ b/apps/computer-vision/app/object_detection_live/index.tsx @@ -0,0 +1,224 @@ +import React, { + useCallback, + useContext, + useEffect, + useMemo, + useRef, + useState, +} from 'react'; +import { + StatusBar, + StyleSheet, + Text, + TouchableOpacity, + View, +} from 'react-native'; +import { useSafeAreaInsets } from 'react-native-safe-area-context'; + +import { + Camera, + getCameraFormat, + Templates, + useCameraDevices, + useCameraPermission, + useFrameOutput, +} from 'react-native-vision-camera'; +import { scheduleOnRN } from 'react-native-worklets'; +import { + Detection, + SSDLITE_320_MOBILENET_V3_LARGE, + useObjectDetection, +} from 'react-native-executorch'; +import { GeneratingContext } from '../../context'; +import Spinner from '../../components/Spinner'; +import ColorPalette from '../../colors'; + +export default function ObjectDetectionLiveScreen() { + const insets = useSafeAreaInsets(); + + const model = useObjectDetection({ model: SSDLITE_320_MOBILENET_V3_LARGE }); + const { setGlobalGenerating } = useContext(GeneratingContext); + + useEffect(() => { + setGlobalGenerating(model.isGenerating); + }, [model.isGenerating, setGlobalGenerating]); + + const [detectionCount, setDetectionCount] = useState(0); + const [fps, setFps] = useState(0); + const lastFrameTimeRef = useRef(Date.now()); + + const cameraPermission = useCameraPermission(); + const devices = useCameraDevices(); + const device = devices.find((d) => d.position === 'back') ?? devices[0]; + + const format = useMemo(() => { + if (device == null) return undefined; + try { + return getCameraFormat(device, Templates.Video); + } catch { + return undefined; + } + }, [device]); + + const updateStats = useCallback((results: Detection[]) => { + setDetectionCount(results.length); + const now = Date.now(); + const timeDiff = now - lastFrameTimeRef.current; + if (timeDiff > 0) { + setFps(Math.round(1000 / timeDiff)); + } + lastFrameTimeRef.current = now; + }, []); + + const frameOutput = useFrameOutput({ + pixelFormat: 'rgb', + onFrame(frame) { + 'worklet'; + if (!model.runOnFrame) { + frame.dispose(); + return; + } + try { + const result = model.runOnFrame(frame, 0.5); + if (result) { + scheduleOnRN(updateStats, result); + } + } catch { + // ignore frame errors + } finally { + frame.dispose(); + } + }, + }); + + if (!model.isReady) { + return ( + + ); + } + + if (!cameraPermission.hasPermission) { + return ( + + Camera access needed + cameraPermission.requestPermission()} + style={styles.button} + > + Grant Permission + + + ); + } + + if (device == null) { + return ( + + No camera device found + + ); + } + + return ( + + + + + + + + + {detectionCount} + objects + + + + {fps} + fps + + + + + ); +} + +const styles = StyleSheet.create({ + container: { + flex: 1, + backgroundColor: 'black', + }, + centered: { + flex: 1, + backgroundColor: 'black', + justifyContent: 'center', + alignItems: 'center', + gap: 16, + }, + message: { + color: 'white', + fontSize: 18, + }, + button: { + paddingHorizontal: 24, + paddingVertical: 12, + backgroundColor: ColorPalette.primary, + borderRadius: 24, + }, + buttonText: { + color: 'white', + fontSize: 15, + fontWeight: '600', + letterSpacing: 0.3, + }, + + // Bottom stats bar + bottomBarWrapper: { + position: 'absolute', + bottom: 0, + left: 0, + right: 0, + alignItems: 'center', + }, + bottomBar: { + flexDirection: 'row', + alignItems: 'center', + backgroundColor: 'rgba(0, 0, 0, 0.55)', + borderRadius: 24, + paddingHorizontal: 28, + paddingVertical: 10, + gap: 24, + }, + statItem: { + alignItems: 'center', + }, + statValue: { + color: 'white', + fontSize: 22, + fontWeight: '700', + letterSpacing: -0.5, + }, + statLabel: { + color: 'rgba(255,255,255,0.55)', + fontSize: 11, + fontWeight: '500', + textTransform: 'uppercase', + letterSpacing: 0.8, + }, + statDivider: { + width: 1, + height: 32, + backgroundColor: 'rgba(255,255,255,0.2)', + }, +}); diff --git a/apps/computer-vision/package.json b/apps/computer-vision/package.json index 3f47c357c..ee879c6af 100644 --- a/apps/computer-vision/package.json +++ b/apps/computer-vision/package.json @@ -31,13 +31,14 @@ "react-native-gesture-handler": "~2.28.0", "react-native-image-picker": "^7.2.2", "react-native-loading-spinner-overlay": "^3.0.1", - "react-native-nitro-image": "0.10.2", - "react-native-nitro-modules": "0.33.4", + "react-native-nitro-image": "^0.12.0", + "react-native-nitro-modules": "^0.33.9", "react-native-reanimated": "~4.2.1", "react-native-safe-area-context": "~5.6.0", "react-native-screens": "~4.16.0", "react-native-svg": "15.12.1", "react-native-svg-transformer": "^1.5.0", + "react-native-vision-camera": "patch:react-native-vision-camera@npm%3A5.0.0-beta.1#~/.yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch", "react-native-worklets": "^0.7.2" }, "devDependencies": { diff --git a/packages/react-native-executorch/src/types/objectDetection.ts b/packages/react-native-executorch/src/types/objectDetection.ts index 11953c954..9532c508f 100644 --- a/packages/react-native-executorch/src/types/objectDetection.ts +++ b/packages/react-native-executorch/src/types/objectDetection.ts @@ -225,10 +225,10 @@ export interface ObjectDetectionType { * ``` * * @param frame - VisionCamera Frame object - * @param detectionThreshold - The threshold for detection sensitivity. Default is 0.5. + * @param detectionThreshold - The threshold for detection sensitivity. * @returns Array of Detection objects representing detected items in the frame. */ runOnFrame: - | ((frame: Frame, detectionThreshold?: number) => Detection[]) + | ((frame: Frame, detectionThreshold: number) => Detection[]) | null; } diff --git a/yarn.lock b/yarn.lock index 3d2d9f7ee..d4316e786 100644 --- a/yarn.lock +++ b/yarn.lock @@ -6585,13 +6585,14 @@ __metadata: react-native-gesture-handler: "npm:~2.28.0" react-native-image-picker: "npm:^7.2.2" react-native-loading-spinner-overlay: "npm:^3.0.1" - react-native-nitro-image: "npm:0.10.2" - react-native-nitro-modules: "npm:0.33.4" + react-native-nitro-image: "npm:^0.12.0" + react-native-nitro-modules: "npm:^0.33.9" react-native-reanimated: "npm:~4.2.1" react-native-safe-area-context: "npm:~5.6.0" react-native-screens: "npm:~4.16.0" react-native-svg: "npm:15.12.1" react-native-svg-transformer: "npm:^1.5.0" + react-native-vision-camera: "patch:react-native-vision-camera@npm%3A5.0.0-beta.1#~/.yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch" react-native-worklets: "npm:^0.7.2" languageName: unknown linkType: soft @@ -13394,24 +13395,24 @@ __metadata: languageName: node linkType: hard -"react-native-nitro-image@npm:0.10.2": - version: 0.10.2 - resolution: "react-native-nitro-image@npm:0.10.2" +"react-native-nitro-image@npm:^0.12.0": + version: 0.12.0 + resolution: "react-native-nitro-image@npm:0.12.0" peerDependencies: react: "*" react-native: "*" react-native-nitro-modules: "*" - checksum: 10/3be75e93da369adfe00441dae78171572dec38d3d7e75e5d4cb302b81479be9686c8d8dc0ea4b331514b8725099bf3eb069ab9933f7029627d12a72d71766cb4 + checksum: 10/03f165381c35e060d4d05eae3ce029b32a4009482f327e9526840f306181ca87a862b335e12667c55d4ee9f2069542ca93dd112feb7f1822bf7d2ddc38fe58f0 languageName: node linkType: hard -"react-native-nitro-modules@npm:0.33.4": - version: 0.33.4 - resolution: "react-native-nitro-modules@npm:0.33.4" +"react-native-nitro-modules@npm:^0.33.9": + version: 0.33.9 + resolution: "react-native-nitro-modules@npm:0.33.9" peerDependencies: react: "*" react-native: "*" - checksum: 10/a737ff6b142c55821688612305245fd10a7cff36f0ee66cad0956c6815a60cdd4ba64cdfba6137a6dbfe815645763ce5d406cf488876edd47dab7f8d0031e01a + checksum: 10/4ebf4db46d1e4987a0e52054724081aa9712bcd1d505a6dbdd47aebc6afe72a7abaa0e947651d9f3cc594e4eb3dba47fc6f59db27c5a5ed383946e40d96543a0 languageName: node linkType: hard @@ -13497,6 +13498,32 @@ __metadata: languageName: node linkType: hard +"react-native-vision-camera@npm:5.0.0-beta.1": + version: 5.0.0-beta.1 + resolution: "react-native-vision-camera@npm:5.0.0-beta.1" + peerDependencies: + react: "*" + react-native: "*" + react-native-nitro-image: "*" + react-native-nitro-modules: "*" + react-native-worklets: "*" + checksum: 10/873410a33e33d68b162b6524997480133ef9b6469dce3f87253c371bba1643d326e835891b0c9f75018d376faf4aec23daba5ab729f431c718ecf901601a8d12 + languageName: node + linkType: hard + +"react-native-vision-camera@patch:react-native-vision-camera@npm%3A5.0.0-beta.1#~/.yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch": + version: 5.0.0-beta.1 + resolution: "react-native-vision-camera@patch:react-native-vision-camera@npm%3A5.0.0-beta.1#~/.yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch::version=5.0.0-beta.1&hash=b52326" + peerDependencies: + react: "*" + react-native: "*" + react-native-nitro-image: "*" + react-native-nitro-modules: "*" + react-native-worklets: "*" + checksum: 10/4ddf9325752243c92c5104b2fe8520d91072d4c359c52708872909b2bb85d136db59215bac1c6f902f04eee683a9d3d8ff11f7729e0468b00dee5aa3bb8f1944 + languageName: node + linkType: hard + "react-native-worklets@npm:0.5.1": version: 0.5.1 resolution: "react-native-worklets@npm:0.5.1" From 983242eb74dcea5e49a2759d105f0f02e4bda378 Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Tue, 24 Feb 2026 09:02:14 +0100 Subject: [PATCH 19/37] feat: suggested changes / improve comments --- .../app/object_detection_live/index.tsx | 2 -- .../rnexecutorch/host_objects/JsiConversions.h | 18 +++++++++--------- .../host_objects/ModelHostObject.h | 6 ++++++ .../metaprogramming/TypeConcepts.h | 5 +++++ .../tests/integration/ObjectDetectionTest.cpp | 10 +++++----- .../computer_vision/ObjectDetectionModule.ts | 1 + .../modules/computer_vision/VisionModule.ts | 9 +++++++-- .../src/types/common.ts | 3 --- .../src/types/objectDetection.ts | 2 +- 9 files changed, 34 insertions(+), 22 deletions(-) diff --git a/apps/computer-vision/app/object_detection_live/index.tsx b/apps/computer-vision/app/object_detection_live/index.tsx index 68fdd4fed..5f8bb2120 100644 --- a/apps/computer-vision/app/object_detection_live/index.tsx +++ b/apps/computer-vision/app/object_detection_live/index.tsx @@ -182,8 +182,6 @@ const styles = StyleSheet.create({ fontWeight: '600', letterSpacing: 0.3, }, - - // Bottom stats bar bottomBarWrapper: { position: 'absolute', bottom: 0, diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h index 5fc8615ea..7b97108b9 100644 --- a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h +++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h @@ -346,6 +346,15 @@ inline jsi::Value getJsiValue(const std::vector &vec, return {runtime, array}; } +inline jsi::Value getJsiValue(const std::vector &vec, + jsi::Runtime &runtime) { + jsi::Array array(runtime, vec.size()); + for (size_t i = 0; i < vec.size(); i++) { + array.setValueAtIndex(runtime, i, jsi::Value(static_cast(vec[i]))); + } + return {runtime, array}; +} + // Conditional as on android, size_t and uint64_t reduce to the same type, // introducing ambiguity template &vec, - jsi::Runtime &runtime) { - jsi::Array array(runtime, vec.size()); - for (size_t i = 0; i < vec.size(); i++) { - array.setValueAtIndex(runtime, i, jsi::Value(static_cast(vec[i]))); - } - return {runtime, array}; -} - inline jsi::Value getJsiValue(int val, jsi::Runtime &runtime) { return {runtime, val}; } diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h index abf920223..d6489c9be 100644 --- a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h +++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h @@ -46,6 +46,12 @@ template class ModelHostObject : public JsiHostObject { "getInputShape")); } + if constexpr (meta::HasGenerate) { + addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject, + promiseHostFunction<&Model::generate>, + "generate")); + } + if constexpr (meta::HasEncode) { addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject, promiseHostFunction<&Model::encode>, diff --git a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h index f625bf6e7..2d7612f25 100644 --- a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h +++ b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h @@ -11,6 +11,11 @@ concept DerivedFromOrSameAs = std::is_base_of_v; template concept SameAs = std::is_same_v; +template +concept HasGenerate = requires(T t) { + { &T::generate }; +}; + template concept HasGenerateFromString = requires(T t) { { &T::generateFromString }; diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp index 93cdbf07c..76c838ca1 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp @@ -122,7 +122,7 @@ TEST(ObjectDetectionGenerateTests, DetectionsHaveValidLabels) { // ============================================================================ TEST(ObjectDetectionPixelTests, ValidPixelDataReturnsResults) { ObjectDetection model(kValidObjectDetectionModelPath, nullptr); - constexpr int width = 4, height = 4, channels = 3; + constexpr int32_t width = 4, height = 4, channels = 3; std::vector pixelData(width * height * channels, 128); JSTensorViewIn tensorView{pixelData.data(), {height, width, channels}, @@ -142,7 +142,7 @@ TEST(ObjectDetectionPixelTests, WrongSizesLengthThrows) { TEST(ObjectDetectionPixelTests, WrongChannelCountThrows) { ObjectDetection model(kValidObjectDetectionModelPath, nullptr); - constexpr int width = 4, height = 4, channels = 4; + constexpr int32_t width = 4, height = 4, channels = 4; std::vector pixelData(width * height * channels, 0); JSTensorViewIn tensorView{pixelData.data(), {height, width, channels}, @@ -153,7 +153,7 @@ TEST(ObjectDetectionPixelTests, WrongChannelCountThrows) { TEST(ObjectDetectionPixelTests, WrongScalarTypeThrows) { ObjectDetection model(kValidObjectDetectionModelPath, nullptr); - constexpr int width = 4, height = 4, channels = 3; + constexpr int32_t width = 4, height = 4, channels = 3; std::vector pixelData(width * height * channels, 0); JSTensorViewIn tensorView{pixelData.data(), {height, width, channels}, @@ -164,7 +164,7 @@ TEST(ObjectDetectionPixelTests, WrongScalarTypeThrows) { TEST(ObjectDetectionPixelTests, NegativeThresholdThrows) { ObjectDetection model(kValidObjectDetectionModelPath, nullptr); - constexpr int width = 4, height = 4, channels = 3; + constexpr int32_t width = 4, height = 4, channels = 3; std::vector pixelData(width * height * channels, 128); JSTensorViewIn tensorView{pixelData.data(), {height, width, channels}, @@ -175,7 +175,7 @@ TEST(ObjectDetectionPixelTests, NegativeThresholdThrows) { TEST(ObjectDetectionPixelTests, ThresholdAboveOneThrows) { ObjectDetection model(kValidObjectDetectionModelPath, nullptr); - constexpr int width = 4, height = 4, channels = 3; + constexpr int32_t width = 4, height = 4, channels = 3; std::vector pixelData(width * height * channels, 128); JSTensorViewIn tensorView{pixelData.data(), {height, width, channels}, diff --git a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts index e62c7221c..f056cff62 100644 --- a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts +++ b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts @@ -28,6 +28,7 @@ export class ObjectDetectionModule extends VisionModule { onDownloadProgressCallback, model.modelSource ); + if (!paths?.[0]) { throw new RnExecutorchError( RnExecutorchErrorCode.DownloadInterrupted, diff --git a/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts b/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts index eabe50ab0..762d09987 100644 --- a/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts +++ b/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts @@ -43,11 +43,16 @@ export abstract class VisionModule extends BaseModule { * const model = new ClassificationModule(); * await model.load({ modelSource: MODEL }); * + * // Use the functional form of setState to store the worklet β€” passing it + * // directly would cause React to invoke it immediately as an updater fn. + * const [runOnFrame, setRunOnFrame] = useState(null); + * setRunOnFrame(() => model.runOnFrame); + * * const frameOutput = useFrameOutput({ * onFrame(frame) { * 'worklet'; - * if (!model.runOnFrame) return; - * const result = model.runOnFrame(frame); + * if (!runOnFrame) return; + * const result = runOnFrame(frame); * frame.dispose(); * } * }); diff --git a/packages/react-native-executorch/src/types/common.ts b/packages/react-native-executorch/src/types/common.ts index 1ebfb3534..d992214dd 100644 --- a/packages/react-native-executorch/src/types/common.ts +++ b/packages/react-native-executorch/src/types/common.ts @@ -193,9 +193,6 @@ export interface PixelData extends Omit { /** * Frame data for vision model processing. - * Supports two modes: - * 1. ArrayBuffer mode (with memory copy) - Compatible with all platforms - * 2. NativeBuffer mode (zero-copy) - Better performance with Vision Camera v5 */ export interface Frame { /** diff --git a/packages/react-native-executorch/src/types/objectDetection.ts b/packages/react-native-executorch/src/types/objectDetection.ts index 9532c508f..5aaf81833 100644 --- a/packages/react-native-executorch/src/types/objectDetection.ts +++ b/packages/react-native-executorch/src/types/objectDetection.ts @@ -176,7 +176,7 @@ export interface ObjectDetectionType { * 1. **String path/URI**: File path, URL, or Base64-encoded string * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage) * - * **Note**: For VisionCamera frame processing, use `processFrame` instead. + * **Note**: For VisionCamera frame processing, use `runOnFrame` instead. * * @param input - Image source (string or PixelData object) * @param detectionThreshold - An optional number between 0 and 1 representing the minimum confidence score. Default is 0.5. From e0e8bcafce494738f5fe0f762179b0e129d3911e Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Wed, 25 Feb 2026 14:16:42 +0100 Subject: [PATCH 20/37] fix(android): object detection not working on android --- ...ative-vision-camera@npm-5.0.0-beta.1.patch | 713 ------------------ apps/computer-vision/app.json | 13 +- .../app/object_detection_live/index.tsx | 3 +- apps/computer-vision/package.json | 2 +- yarn.lock | 23 +- 5 files changed, 20 insertions(+), 734 deletions(-) delete mode 100644 .yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch diff --git a/.yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch b/.yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch deleted file mode 100644 index 73f999e9a..000000000 --- a/.yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch +++ /dev/null @@ -1,713 +0,0 @@ -diff --git a/lib/expo-plugin/withVisionCamera.js b/lib/expo-plugin/withVisionCamera.js -index 32418a9..f7a8c5c 100644 ---- a/lib/expo-plugin/withVisionCamera.js -+++ b/lib/expo-plugin/withVisionCamera.js -@@ -1,4 +1,4 @@ --import { AndroidConfig, withPlugins, } from '@expo/config-plugins'; -+const { AndroidConfig, withPlugins } = require('@expo/config-plugins'); - const CAMERA_USAGE = 'Allow $(PRODUCT_NAME) to access your camera'; - const MICROPHONE_USAGE = 'Allow $(PRODUCT_NAME) to access your microphone'; - const withVisionCamera = (config, props = {}) => { -@@ -30,4 +30,4 @@ const withVisionCamera = (config, props = {}) => { - [AndroidConfig.Permissions.withPermissions, androidPermissions], - ]); - }; --export default withVisionCamera; -+module.exports = withVisionCamera; -diff --git a/cpp/Frame Processors/HybridWorkletQueueFactory.cpp b/cpp/Frame Processors/HybridWorkletQueueFactory.cpp -new file mode 100644 -index 0000000..5da4ef9 ---- /dev/null -+++ b/cpp/Frame Processors/HybridWorkletQueueFactory.cpp -@@ -0,0 +1,50 @@ -+/// -+/// HybridWorkletQueueFactory.cpp -+/// VisionCamera -+/// Copyright Β© 2025 Marc Rousavy @ Margelo -+/// -+ -+#include "HybridWorkletQueueFactory.hpp" -+ -+#include "JSIConverter+AsyncQueue.hpp" -+#include "NativeThreadAsyncQueue.hpp" -+#include "NativeThreadDispatcher.hpp" -+#include -+#include -+ -+namespace margelo::nitro::camera { -+ -+HybridWorkletQueueFactory::HybridWorkletQueueFactory() : HybridObject(TAG) {} -+ -+void HybridWorkletQueueFactory::loadHybridMethods() { -+ HybridWorkletQueueFactorySpec::loadHybridMethods(); -+ registerHybrids(this, [](Prototype& prototype) { -+ prototype.registerRawHybridMethod("installDispatcher", 1, &HybridWorkletQueueFactory::installDispatcher); -+ }); -+} -+ -+std::shared_ptr HybridWorkletQueueFactory::wrapThreadInQueue(const std::shared_ptr& thread) { -+ return std::make_shared(thread); -+} -+ -+double HybridWorkletQueueFactory::getCurrentThreadMarker() { -+ static std::atomic_size_t threadCounter{1}; -+ static thread_local size_t thisThreadId{0}; -+ if (thisThreadId == 0) { -+ thisThreadId = threadCounter.fetch_add(1); -+ } -+ return static_cast(thisThreadId); -+} -+ -+jsi::Value HybridWorkletQueueFactory::installDispatcher(jsi::Runtime& runtime, const jsi::Value&, const jsi::Value* args, size_t count) { -+ if (count != 1) -+ throw std::runtime_error("installDispatcher(..) must be called with exactly 1 argument!"); -+ auto thread = JSIConverter>::fromJSI(runtime, args[0]); -+ -+ auto dispatcher = std::make_shared(thread); -+ Dispatcher::installRuntimeGlobalDispatcher(runtime, dispatcher); -+ -+ return jsi::Value::undefined(); -+} -+ -+} // namespace margelo::nitro::camera -diff --git a/android/CMakeLists.txt b/android/CMakeLists.txt -index 0000000..1111111 100644 ---- a/android/CMakeLists.txt -+++ b/android/CMakeLists.txt -@@ -20,6 +20,7 @@ - "src/main/cpp" - "../cpp" - "../cpp/Frame Processors" -+ "../nitrogen/generated/shared/c++" - ) - - find_library(LOG_LIB log) -diff --git a/nitrogen/generated/shared/c++/HybridWorkletQueueFactory.cpp b/nitrogen/generated/shared/c++/HybridWorkletQueueFactory.cpp -new file mode 100644 -index 0000000..5da4ef9 ---- /dev/null -+++ b/nitrogen/generated/shared/c++/HybridWorkletQueueFactory.cpp -@@ -0,0 +1,50 @@ -+/// -+/// HybridWorkletQueueFactory.cpp -+/// VisionCamera -+/// Copyright Β© 2025 Marc Rousavy @ Margelo -+/// -+ -+#include "HybridWorkletQueueFactory.hpp" -+ -+#include "JSIConverter+AsyncQueue.hpp" -+#include "NativeThreadAsyncQueue.hpp" -+#include "NativeThreadDispatcher.hpp" -+#include -+#include -+ -+namespace margelo::nitro::camera { -+ -+HybridWorkletQueueFactory::HybridWorkletQueueFactory() : HybridObject(TAG) {} -+ -+void HybridWorkletQueueFactory::loadHybridMethods() { -+ HybridWorkletQueueFactorySpec::loadHybridMethods(); -+ registerHybrids(this, [](Prototype& prototype) { -+ prototype.registerRawHybridMethod("installDispatcher", 1, &HybridWorkletQueueFactory::installDispatcher); -+ }); -+} -+ -+std::shared_ptr HybridWorkletQueueFactory::wrapThreadInQueue(const std::shared_ptr& thread) { -+ return std::make_shared(thread); -+} -+ -+double HybridWorkletQueueFactory::getCurrentThreadMarker() { -+ static std::atomic_size_t threadCounter{1}; -+ static thread_local size_t thisThreadId{0}; -+ if (thisThreadId == 0) { -+ thisThreadId = threadCounter.fetch_add(1); -+ } -+ return static_cast(thisThreadId); -+} -+ -+jsi::Value HybridWorkletQueueFactory::installDispatcher(jsi::Runtime& runtime, const jsi::Value&, const jsi::Value* args, size_t count) { -+ if (count != 1) -+ throw std::runtime_error("installDispatcher(..) must be called with exactly 1 argument!"); -+ auto thread = JSIConverter>::fromJSI(runtime, args[0]); -+ -+ auto dispatcher = std::make_shared(thread); -+ Dispatcher::installRuntimeGlobalDispatcher(runtime, dispatcher); -+ -+ return jsi::Value::undefined(); -+} -+ -+} // namespace margelo::nitro::camera -diff --git a/nitrogen/generated/shared/c++/HybridWorkletQueueFactory.hpp b/nitrogen/generated/shared/c++/HybridWorkletQueueFactory.hpp -new file mode 100644 -index 0000000..daa16d2 ---- /dev/null -+++ b/nitrogen/generated/shared/c++/HybridWorkletQueueFactory.hpp -@@ -0,0 +1,29 @@ -+/// -+/// HybridWorkletQueueFactory.hpp -+/// VisionCamera -+/// Copyright Β© 2025 Marc Rousavy @ Margelo -+/// -+ -+#pragma once -+ -+#include "HybridWorkletQueueFactorySpec.hpp" -+#include "JSIConverter+AsyncQueue.hpp" -+#include -+#include -+ -+namespace margelo::nitro::camera { -+ -+class HybridWorkletQueueFactory : public HybridWorkletQueueFactorySpec { -+public: -+ HybridWorkletQueueFactory(); -+ -+public: -+ std::shared_ptr wrapThreadInQueue(const std::shared_ptr& thread) override; -+ double getCurrentThreadMarker() override; -+ -+ jsi::Value installDispatcher(jsi::Runtime& runtime, const jsi::Value&, const jsi::Value* args, size_t count); -+ -+ void loadHybridMethods() override; -+}; -+ -+} // namespace margelo::nitro::camera -diff --git a/nitrogen/generated/shared/c++/JSIConverter+AsyncQueue.hpp b/nitrogen/generated/shared/c++/JSIConverter+AsyncQueue.hpp -new file mode 100644 -index 0000000..5b93f2d ---- /dev/null -+++ b/nitrogen/generated/shared/c++/JSIConverter+AsyncQueue.hpp -@@ -0,0 +1,24 @@ -+/// -+/// JSIConverter+AsyncQueue.swift -+/// VisionCamera -+/// Copyright Β© 2025 Marc Rousavy @ Margelo -+/// -+ -+#pragma once -+ -+#include -+#include -+#if __has_include() -+#include -+#elif __has_include() -+#include -+#else -+#error react-native-worklets Prefab not found! -+#endif -+ -+namespace margelo::nitro { -+ -+// JSIConverter> is implemented -+// in JSIConverter> -+ -+} -diff --git a/nitrogen/generated/shared/c++/NativeThreadAsyncQueue.hpp b/nitrogen/generated/shared/c++/NativeThreadAsyncQueue.hpp -new file mode 100644 -index 0000000..d5a0958 ---- /dev/null -+++ b/nitrogen/generated/shared/c++/NativeThreadAsyncQueue.hpp -@@ -0,0 +1,34 @@ -+/// -+/// NativeThreadAsyncQueue.hpp -+/// VisionCamera -+/// Copyright Β© 2025 Marc Rousavy @ Margelo -+/// -+ -+#pragma once -+ -+#include "HybridNativeThreadSpec.hpp" -+#include "JSIConverter+AsyncQueue.hpp" -+#include -+ -+namespace margelo::nitro::camera { -+ -+/** -+ * An implementation of `worklets::AsyncQueue` that uses a `NativeThread` to run its jobs. -+ * -+ * The `NativeThread` (`HybridNativeThreadSpec`) is a platform-implemented object, -+ * e.g. using `DispatchQueue` on iOS. -+ */ -+class NativeThreadAsyncQueue : public worklets::AsyncQueue { -+public: -+ NativeThreadAsyncQueue(std::shared_ptr thread) : _thread(std::move(thread)) {} -+ -+ void push(std::function&& job) override { -+ auto jobCopy = job; -+ _thread->runOnThread(jobCopy); -+ } -+ -+private: -+ std::shared_ptr _thread; -+}; -+ -+} // namespace margelo::nitro::camera -diff --git a/nitrogen/generated/shared/c++/NativeThreadDispatcher.hpp b/nitrogen/generated/shared/c++/NativeThreadDispatcher.hpp -new file mode 100644 -index 0000000..758d2f2 ---- /dev/null -+++ b/nitrogen/generated/shared/c++/NativeThreadDispatcher.hpp -@@ -0,0 +1,36 @@ -+/// -+/// NativeThreadDispatcher.hpp -+/// VisionCamera -+/// Copyright Β© 2025 Marc Rousavy @ Margelo -+/// -+ -+#pragma once -+ -+#include "HybridNativeThreadSpec.hpp" -+#include "JSIConverter+AsyncQueue.hpp" -+#include -+ -+namespace margelo::nitro::camera { -+ -+/** -+ * An implementation of `nitro::Dispatcher` that uses a `NativeThread` to run its jobs. -+ * -+ * The `NativeThread` (`HybridNativeThreadSpec`) is a platform-implemented object, -+ * e.g. using `DispatchQueue` on iOS. -+ */ -+class NativeThreadDispatcher : public nitro::Dispatcher { -+public: -+ NativeThreadDispatcher(std::shared_ptr thread) : _thread(std::move(thread)) {} -+ -+ void runSync(std::function&&) override { -+ throw std::runtime_error("runSync(...) is not implemented for NativeThreadDispatcher!"); -+ } -+ void runAsync(std::function&& function) override { -+ _thread->runOnThread(function); -+ } -+ -+private: -+ std::shared_ptr _thread; -+}; -+ -+} // namespace margelo::nitro::camera -diff --git a/nitrogen/generated/android/kotlin/com/margelo/nitro/camera/BoundingBox.kt b/nitrogen/generated/android/kotlin/com/margelo/nitro/camera/BoundingBox.kt -new file mode 100644 -index 0000000..aaaaaaa ---- /dev/null -+++ b/nitrogen/generated/android/kotlin/com/margelo/nitro/camera/BoundingBox.kt -@@ -0,0 +1,47 @@ -+/// -+/// BoundingBox.kt -+/// This file was generated by nitrogen. DO NOT MODIFY THIS FILE. -+/// https://github.com/mrousavy/nitro -+/// Copyright Β© Marc Rousavy @ Margelo -+/// -+ -+package com.margelo.nitro.camera -+ -+import androidx.annotation.Keep -+import com.facebook.proguard.annotations.DoNotStrip -+ -+ -+/** -+ * Represents the JavaScript object/struct "BoundingBox". -+ */ -+@DoNotStrip -+@Keep -+data class BoundingBox( -+ @DoNotStrip -+ @Keep -+ val x: Double, -+ @DoNotStrip -+ @Keep -+ val y: Double, -+ @DoNotStrip -+ @Keep -+ val width: Double, -+ @DoNotStrip -+ @Keep -+ val height: Double -+) { -+ /* primary constructor */ -+ -+ companion object { -+ /** -+ * Constructor called from C++ -+ */ -+ @DoNotStrip -+ @Keep -+ @Suppress("unused") -+ @JvmStatic -+ private fun fromCpp(x: Double, y: Double, width: Double, height: Double): BoundingBox { -+ return BoundingBox(x, y, width, height) -+ } -+ } -+} -diff --git a/nitrogen/generated/android/kotlin/com/margelo/nitro/camera/HybridScannedObjectSpec.kt b/nitrogen/generated/android/kotlin/com/margelo/nitro/camera/HybridScannedObjectSpec.kt -new file mode 100644 -index 0000000..bbbbbbb ---- /dev/null -+++ b/nitrogen/generated/android/kotlin/com/margelo/nitro/camera/HybridScannedObjectSpec.kt -@@ -0,0 +1,60 @@ -+/// -+/// HybridScannedObjectSpec.kt -+/// This file was generated by nitrogen. DO NOT MODIFY THIS FILE. -+/// https://github.com/mrousavy/nitro -+/// Copyright Β© Marc Rousavy @ Margelo -+/// -+ -+package com.margelo.nitro.camera -+ -+import androidx.annotation.Keep -+import com.facebook.jni.HybridData -+import com.facebook.proguard.annotations.DoNotStrip -+import com.margelo.nitro.core.HybridObject -+ -+/** -+ * A Kotlin class representing the ScannedObject HybridObject. -+ * Implement this abstract class to create Kotlin-based instances of ScannedObject. -+ */ -+@DoNotStrip -+@Keep -+@Suppress( -+ "KotlinJniMissingFunction", "unused", -+ "RedundantSuppression", "RedundantUnitReturnType", "SimpleRedundantLet", -+ "LocalVariableName", "PropertyName", "PrivatePropertyName", "FunctionName" -+) -+abstract class HybridScannedObjectSpec: HybridObject() { -+ @DoNotStrip -+ private var mHybridData: HybridData = initHybrid() -+ -+ init { -+ super.updateNative(mHybridData) -+ } -+ -+ override fun updateNative(hybridData: HybridData) { -+ mHybridData = hybridData -+ super.updateNative(hybridData) -+ } -+ -+ // Default implementation of `HybridObject.toString()` -+ override fun toString(): String { -+ return "[HybridObject ScannedObject]" -+ } -+ -+ // Properties -+ @get:DoNotStrip -+ @get:Keep -+ abstract val type: ScannedObjectType -+ -+ @get:DoNotStrip -+ @get:Keep -+ abstract val boundingBox: BoundingBox -+ -+ // Methods -+ -+ private external fun initHybrid(): HybridData -+ -+ companion object { -+ protected const val TAG = "HybridScannedObjectSpec" -+ } -+} -diff --git a/nitrogen/generated/android/c++/JBoundingBox.hpp b/nitrogen/generated/android/c++/JBoundingBox.hpp -new file mode 100644 -index 0000000..ccccccc ---- /dev/null -+++ b/nitrogen/generated/android/c++/JBoundingBox.hpp -@@ -0,0 +1,69 @@ -+/// -+/// JBoundingBox.hpp -+/// This file was generated by nitrogen. DO NOT MODIFY THIS FILE. -+/// https://github.com/mrousavy/nitro -+/// Copyright Β© Marc Rousavy @ Margelo -+/// -+ -+#pragma once -+ -+#include -+#include "BoundingBox.hpp" -+ -+ -+ -+namespace margelo::nitro::camera { -+ -+ using namespace facebook; -+ -+ /** -+ * The C++ JNI bridge between the C++ struct "BoundingBox" and the the Kotlin data class "BoundingBox". -+ */ -+ struct JBoundingBox final: public jni::JavaClass { -+ public: -+ static auto constexpr kJavaDescriptor = "Lcom/margelo/nitro/camera/BoundingBox;"; -+ -+ public: -+ /** -+ * Convert this Java/Kotlin-based struct to the C++ struct BoundingBox by copying all values to C++. -+ */ -+ [[maybe_unused]] -+ [[nodiscard]] -+ BoundingBox toCpp() const { -+ static const auto clazz = javaClassStatic(); -+ static const auto fieldX = clazz->getField("x"); -+ double x = this->getFieldValue(fieldX); -+ static const auto fieldY = clazz->getField("y"); -+ double y = this->getFieldValue(fieldY); -+ static const auto fieldWidth = clazz->getField("width"); -+ double width = this->getFieldValue(fieldWidth); -+ static const auto fieldHeight = clazz->getField("height"); -+ double height = this->getFieldValue(fieldHeight); -+ return BoundingBox( -+ x, -+ y, -+ width, -+ height -+ ); -+ } -+ -+ public: -+ /** -+ * Create a Java/Kotlin-based struct by copying all values from the given C++ struct to Java. -+ */ -+ [[maybe_unused]] -+ static jni::local_ref fromCpp(const BoundingBox& value) { -+ using JSignature = JBoundingBox(double, double, double, double); -+ static const auto clazz = javaClassStatic(); -+ static const auto create = clazz->getStaticMethod("fromCpp"); -+ return create( -+ clazz, -+ value.x, -+ value.y, -+ value.width, -+ value.height -+ ); -+ } -+ }; -+ -+} // namespace margelo::nitro::camera -diff --git a/nitrogen/generated/android/c++/JHybridScannedObjectSpec.hpp b/nitrogen/generated/android/c++/JHybridScannedObjectSpec.hpp -new file mode 100644 -index 0000000..ddddddd ---- /dev/null -+++ b/nitrogen/generated/android/c++/JHybridScannedObjectSpec.hpp -@@ -0,0 +1,63 @@ -+/// -+/// JHybridScannedObjectSpec.hpp -+/// This file was generated by nitrogen. DO NOT MODIFY THIS FILE. -+/// https://github.com/mrousavy/nitro -+/// Copyright Β© Marc Rousavy @ Margelo -+/// -+ -+#pragma once -+ -+#include -+#include -+#include "HybridScannedObjectSpec.hpp" -+ -+ -+ -+ -+namespace margelo::nitro::camera { -+ -+ using namespace facebook; -+ -+ class JHybridScannedObjectSpec: public jni::HybridClass, -+ public virtual HybridScannedObjectSpec { -+ public: -+ static auto constexpr kJavaDescriptor = "Lcom/margelo/nitro/camera/HybridScannedObjectSpec;"; -+ static jni::local_ref initHybrid(jni::alias_ref jThis); -+ static void registerNatives(); -+ -+ protected: -+ // C++ constructor (called from Java via `initHybrid()`) -+ explicit JHybridScannedObjectSpec(jni::alias_ref jThis) : -+ HybridObject(HybridScannedObjectSpec::TAG), -+ HybridBase(jThis), -+ _javaPart(jni::make_global(jThis)) {} -+ -+ public: -+ ~JHybridScannedObjectSpec() override { -+ // Hermes GC can destroy JS objects on a non-JNI Thread. -+ jni::ThreadScope::WithClassLoader([&] { _javaPart.reset(); }); -+ } -+ -+ public: -+ size_t getExternalMemorySize() noexcept override; -+ bool equals(const std::shared_ptr& other) override; -+ void dispose() noexcept override; -+ std::string toString() override; -+ -+ public: -+ inline const jni::global_ref& getJavaPart() const noexcept { -+ return _javaPart; -+ } -+ -+ public: -+ // Properties -+ ScannedObjectType getType() override; -+ BoundingBox getBoundingBox() override; -+ -+ private: -+ friend HybridBase; -+ using HybridBase::HybridBase; -+ jni::global_ref _javaPart; -+ }; -+ -+} // namespace margelo::nitro::camera -diff --git a/nitrogen/generated/android/VisionCamera+autolinking.cmake b/nitrogen/generated/android/VisionCamera+autolinking.cmake -index 0000000..1111111 100644 ---- a/nitrogen/generated/android/VisionCamera+autolinking.cmake -+++ b/nitrogen/generated/android/VisionCamera+autolinking.cmake -@@ -112,3 +112,4 @@ - ../nitrogen/generated/android/c++/JHybridPreviewViewSpec.cpp - ../nitrogen/generated/android/c++/views/JHybridPreviewViewStateUpdater.cpp -+ ../nitrogen/generated/android/c++/JHybridScannedObjectSpec.cpp - ) -diff --git a/nitrogen/generated/android/c++/JHybridScannedObjectSpec.cpp b/nitrogen/generated/android/c++/JHybridScannedObjectSpec.cpp -new file mode 100644 -index 0000000..eeeeeee ---- /dev/null -+++ b/nitrogen/generated/android/c++/JHybridScannedObjectSpec.cpp -@@ -0,0 +1,69 @@ -+/// -+/// JHybridScannedObjectSpec.cpp -+/// This file was generated by nitrogen. DO NOT MODIFY THIS FILE. -+/// https://github.com/mrousavy/nitro -+/// Copyright Β© Marc Rousavy @ Margelo -+/// -+ -+#include "JHybridScannedObjectSpec.hpp" -+ -+// Forward declaration of `ScannedObjectType` to properly resolve imports. -+namespace margelo::nitro::camera { enum class ScannedObjectType; } -+// Forward declaration of `BoundingBox` to properly resolve imports. -+namespace margelo::nitro::camera { struct BoundingBox; } -+ -+#include "ScannedObjectType.hpp" -+#include "JScannedObjectType.hpp" -+#include "BoundingBox.hpp" -+#include "JBoundingBox.hpp" -+ -+namespace margelo::nitro::camera { -+ -+ jni::local_ref JHybridScannedObjectSpec::initHybrid(jni::alias_ref jThis) { -+ return makeCxxInstance(jThis); -+ } -+ -+ void JHybridScannedObjectSpec::registerNatives() { -+ registerHybrid({ -+ makeNativeMethod("initHybrid", JHybridScannedObjectSpec::initHybrid), -+ }); -+ } -+ -+ size_t JHybridScannedObjectSpec::getExternalMemorySize() noexcept { -+ static const auto method = javaClassStatic()->getMethod("getMemorySize"); -+ return method(_javaPart); -+ } -+ -+ bool JHybridScannedObjectSpec::equals(const std::shared_ptr& other) { -+ if (auto otherCast = std::dynamic_pointer_cast(other)) { -+ return _javaPart == otherCast->_javaPart; -+ } -+ return false; -+ } -+ -+ void JHybridScannedObjectSpec::dispose() noexcept { -+ static const auto method = javaClassStatic()->getMethod("dispose"); -+ method(_javaPart); -+ } -+ -+ std::string JHybridScannedObjectSpec::toString() { -+ static const auto method = javaClassStatic()->getMethod("toString"); -+ auto javaString = method(_javaPart); -+ return javaString->toStdString(); -+ } -+ -+ // Properties -+ ScannedObjectType JHybridScannedObjectSpec::getType() { -+ static const auto method = javaClassStatic()->getMethod()>("getType"); -+ auto __result = method(_javaPart); -+ return __result->toCpp(); -+ } -+ BoundingBox JHybridScannedObjectSpec::getBoundingBox() { -+ static const auto method = javaClassStatic()->getMethod()>("getBoundingBox"); -+ auto __result = method(_javaPart); -+ return __result->toCpp(); -+ } -+ -+ // Methods -+ -+} // namespace margelo::nitro::camera -diff --git a/android/src/main/java/com/margelo/nitro/camera/hybrids/recording/HybridVideoRecorder.kt b/android/src/main/java/com/margelo/nitro/camera/hybrids/recording/HybridVideoRecorder.kt -index aaaaaaa..bbbbbbb 100644 ---- a/android/src/main/java/com/margelo/nitro/camera/hybrids/recording/HybridVideoRecorder.kt -+++ b/android/src/main/java/com/margelo/nitro/camera/hybrids/recording/HybridVideoRecorder.kt -@@ -55,6 +55,6 @@ - when (event) { - is VideoRecordEvent.Start -> { -- promise.resolve() -+ promise.resolve(Unit) - didResolve = true - } - -@@ -98,27 +98,48 @@ - override fun stopRecording(): Promise { -- return Promise.parallel(executor) { -- val recording = recording ?: throw Error("Not currently recording!") -- recording.stop() -- this.isPaused = false -- this.recording = null -- this.recordedDuration = 0.0 -- this.recordedFileSize = 0.0 -- } -+ val promise = Promise() -+ executor.execute { -+ try { -+ val recording = recording ?: throw Error("Not currently recording!") -+ recording.stop() -+ this.isPaused = false -+ this.recording = null -+ this.recordedDuration = 0.0 -+ this.recordedFileSize = 0.0 -+ promise.resolve(Unit) -+ } catch (e: Throwable) { -+ promise.reject(e) -+ } -+ } -+ return promise - } - - override fun pauseRecording(): Promise { -- return Promise.parallel(executor) { -- val recording = recording ?: throw Error("Not currently recording!") -- recording.pause() -- this.isPaused = true -- } -+ val promise = Promise() -+ executor.execute { -+ try { -+ val recording = recording ?: throw Error("Not currently recording!") -+ recording.pause() -+ this.isPaused = true -+ promise.resolve(Unit) -+ } catch (e: Throwable) { -+ promise.reject(e) -+ } -+ } -+ return promise - } - - override fun resumeRecording(): Promise { -- return Promise.parallel(executor) { -- val recording = recording ?: throw Error("Not currently recording!") -- recording.resume() -- this.isPaused = false -- } -+ val promise = Promise() -+ executor.execute { -+ try { -+ val recording = recording ?: throw Error("Not currently recording!") -+ recording.resume() -+ this.isPaused = false -+ promise.resolve(Unit) -+ } catch (e: Throwable) { -+ promise.reject(e) -+ } -+ } -+ return promise - } - } diff --git a/apps/computer-vision/app.json b/apps/computer-vision/app.json index 5db8c1390..4fcbca2ce 100644 --- a/apps/computer-vision/app.json +++ b/apps/computer-vision/app.json @@ -31,6 +31,17 @@ "web": { "favicon": "./assets/icons/favicon.png" }, - "plugins": ["expo-font", "expo-router"] + "plugins": [ + "expo-font", + "expo-router", + [ + "expo-build-properties", + { + "android": { + "minSdkVersion": 26 + } + } + ] + ] } } diff --git a/apps/computer-vision/app/object_detection_live/index.tsx b/apps/computer-vision/app/object_detection_live/index.tsx index 5f8bb2120..cd1e9cca8 100644 --- a/apps/computer-vision/app/object_detection_live/index.tsx +++ b/apps/computer-vision/app/object_detection_live/index.tsx @@ -54,7 +54,7 @@ export default function ObjectDetectionLiveScreen() { const format = useMemo(() => { if (device == null) return undefined; try { - return getCameraFormat(device, Templates.Video); + return getCameraFormat(device, Templates.FrameProcessing); } catch { return undefined; } @@ -72,6 +72,7 @@ export default function ObjectDetectionLiveScreen() { const frameOutput = useFrameOutput({ pixelFormat: 'rgb', + dropFramesWhileBusy: true, onFrame(frame) { 'worklet'; if (!model.runOnFrame) { diff --git a/apps/computer-vision/package.json b/apps/computer-vision/package.json index ee879c6af..328e9bc4f 100644 --- a/apps/computer-vision/package.json +++ b/apps/computer-vision/package.json @@ -38,7 +38,7 @@ "react-native-screens": "~4.16.0", "react-native-svg": "15.12.1", "react-native-svg-transformer": "^1.5.0", - "react-native-vision-camera": "patch:react-native-vision-camera@npm%3A5.0.0-beta.1#~/.yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch", + "react-native-vision-camera": "5.0.0-beta.2", "react-native-worklets": "^0.7.2" }, "devDependencies": { diff --git a/yarn.lock b/yarn.lock index d4316e786..ac9b276b6 100644 --- a/yarn.lock +++ b/yarn.lock @@ -6592,7 +6592,7 @@ __metadata: react-native-screens: "npm:~4.16.0" react-native-svg: "npm:15.12.1" react-native-svg-transformer: "npm:^1.5.0" - react-native-vision-camera: "patch:react-native-vision-camera@npm%3A5.0.0-beta.1#~/.yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch" + react-native-vision-camera: "npm:5.0.0-beta.2" react-native-worklets: "npm:^0.7.2" languageName: unknown linkType: soft @@ -13498,29 +13498,16 @@ __metadata: languageName: node linkType: hard -"react-native-vision-camera@npm:5.0.0-beta.1": - version: 5.0.0-beta.1 - resolution: "react-native-vision-camera@npm:5.0.0-beta.1" +"react-native-vision-camera@npm:5.0.0-beta.2": + version: 5.0.0-beta.2 + resolution: "react-native-vision-camera@npm:5.0.0-beta.2" peerDependencies: react: "*" react-native: "*" react-native-nitro-image: "*" react-native-nitro-modules: "*" react-native-worklets: "*" - checksum: 10/873410a33e33d68b162b6524997480133ef9b6469dce3f87253c371bba1643d326e835891b0c9f75018d376faf4aec23daba5ab729f431c718ecf901601a8d12 - languageName: node - linkType: hard - -"react-native-vision-camera@patch:react-native-vision-camera@npm%3A5.0.0-beta.1#~/.yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch": - version: 5.0.0-beta.1 - resolution: "react-native-vision-camera@patch:react-native-vision-camera@npm%3A5.0.0-beta.1#~/.yarn/patches/react-native-vision-camera@npm-5.0.0-beta.1.patch::version=5.0.0-beta.1&hash=b52326" - peerDependencies: - react: "*" - react-native: "*" - react-native-nitro-image: "*" - react-native-nitro-modules: "*" - react-native-worklets: "*" - checksum: 10/4ddf9325752243c92c5104b2fe8520d91072d4c359c52708872909b2bb85d136db59215bac1c6f902f04eee683a9d3d8ff11f7729e0468b00dee5aa3bb8f1944 + checksum: 10/1f38d097d001c10b8544d0b931a9387a91c5df1e0677ae53e639962a90589586af02ca658ca5e99a5ca179af8d86bc8365227cf70750f2df4bfb775f4a26fc6d languageName: node linkType: hard From 3aa0f899fe663718c31689214b409b86eacffb93 Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Wed, 25 Feb 2026 14:19:49 +0100 Subject: [PATCH 21/37] chore: remove unused ImageSegmentation.cpp --- .../image_segmentation/ImageSegmentation.cpp | 170 ------------------ 1 file changed, 170 deletions(-) delete mode 100644 packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp diff --git a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp deleted file mode 100644 index a2c1ae865..000000000 --- a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp +++ /dev/null @@ -1,170 +0,0 @@ -#include "ImageSegmentation.h" - -#include - -#include -#include -#include -#include -#include -#include - -namespace rnexecutorch::models::image_segmentation { - -ImageSegmentation::ImageSegmentation( - const std::string &modelSource, - std::shared_ptr callInvoker) - : BaseModel(modelSource, callInvoker) { - auto inputShapes = getAllInputShapes(); - if (inputShapes.size() == 0) { - throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs, - "Model seems to not take any input tensors."); - } - std::vector modelInputShape = inputShapes[0]; - if (modelInputShape.size() < 2) { - char errorMessage[100]; - std::snprintf(errorMessage, sizeof(errorMessage), - "Unexpected model input size, expected at least 2 dimentions " - "but got: %zu.", - modelInputShape.size()); - throw RnExecutorchError(RnExecutorchErrorCode::WrongDimensions, - errorMessage); - } - modelImageSize = cv::Size(modelInputShape[modelInputShape.size() - 1], - modelInputShape[modelInputShape.size() - 2]); - numModelPixels = modelImageSize.area(); -} - -std::shared_ptr ImageSegmentation::generate( - std::string imageSource, - std::set> classesOfInterest, bool resize) { - auto [inputTensor, originalSize] = - image_processing::readImageToTensor(imageSource, getAllInputShapes()[0]); - - auto forwardResult = BaseModel::forward(inputTensor); - if (!forwardResult.ok()) { - throw RnExecutorchError(forwardResult.error(), - "The model's forward function did not succeed. " - "Ensure the model input is correct."); - } - - return postprocess(forwardResult->at(0).toTensor(), originalSize, - classesOfInterest, resize); -} - -std::shared_ptr ImageSegmentation::postprocess( - const Tensor &tensor, cv::Size originalSize, - std::set> classesOfInterest, bool resize) { - - auto dataPtr = static_cast(tensor.const_data_ptr()); - auto resultData = std::span(dataPtr, tensor.numel()); - - // We copy the ET-owned data to jsi array buffers that can be directly - // returned to JS - std::vector> resultClasses; - resultClasses.reserve(numClasses); - for (std::size_t cl = 0; cl < numClasses; ++cl) { - auto classBuffer = std::make_shared( - &resultData[cl * numModelPixels], numModelPixels * sizeof(float)); - resultClasses.push_back(classBuffer); - } - - // Apply softmax per each pixel across all classes - for (std::size_t pixel = 0; pixel < numModelPixels; ++pixel) { - std::vector classValues(numClasses); - for (std::size_t cl = 0; cl < numClasses; ++cl) { - classValues[cl] = - reinterpret_cast(resultClasses[cl]->data())[pixel]; - } - numerical::softmax(classValues); - for (std::size_t cl = 0; cl < numClasses; ++cl) { - reinterpret_cast(resultClasses[cl]->data())[pixel] = - classValues[cl]; - } - } - - // Calculate the maximum class for each pixel - auto argmax = - std::make_shared(numModelPixels * sizeof(int32_t)); - for (std::size_t pixel = 0; pixel < numModelPixels; ++pixel) { - float max = reinterpret_cast(resultClasses[0]->data())[pixel]; - int maxInd = 0; - for (int cl = 1; cl < numClasses; ++cl) { - if (reinterpret_cast(resultClasses[cl]->data())[pixel] > max) { - maxInd = cl; - max = reinterpret_cast(resultClasses[cl]->data())[pixel]; - } - } - reinterpret_cast(argmax->data())[pixel] = maxInd; - } - - auto buffersToReturn = std::make_shared>>(); - for (std::size_t cl = 0; cl < numClasses; ++cl) { - if (classesOfInterest.contains(constants::kDeeplabV3Resnet50Labels[cl])) { - (*buffersToReturn)[constants::kDeeplabV3Resnet50Labels[cl]] = - resultClasses[cl]; - } - } - - // Resize selected classes and argmax - if (resize) { - cv::Mat argmaxMat(modelImageSize, CV_32SC1, argmax->data()); - cv::resize(argmaxMat, argmaxMat, originalSize, 0, 0, - cv::InterpolationFlags::INTER_NEAREST); - argmax = std::make_shared( - argmaxMat.data, originalSize.area() * sizeof(int32_t)); - - for (auto &[label, arrayBuffer] : *buffersToReturn) { - cv::Mat classMat(modelImageSize, CV_32FC1, arrayBuffer->data()); - cv::resize(classMat, classMat, originalSize); - arrayBuffer = std::make_shared( - classMat.data, originalSize.area() * sizeof(float)); - } - } - return populateDictionary(argmax, buffersToReturn); -} - -std::shared_ptr ImageSegmentation::populateDictionary( - std::shared_ptr argmax, - std::shared_ptr>> - classesToOutput) { - // Synchronize the invoked thread to return when the dict is constructed - auto promisePtr = std::make_shared>(); - std::future doneFuture = promisePtr->get_future(); - - std::shared_ptr dictPtr = nullptr; - callInvoker->invokeAsync( - [argmax, classesToOutput, &dictPtr, promisePtr](jsi::Runtime &runtime) { - dictPtr = std::make_shared(runtime); - auto argmaxArrayBuffer = jsi::ArrayBuffer(runtime, argmax); - - auto int32ArrayCtor = - runtime.global().getPropertyAsFunction(runtime, "Int32Array"); - auto int32Array = - int32ArrayCtor.callAsConstructor(runtime, argmaxArrayBuffer) - .getObject(runtime); - dictPtr->setProperty(runtime, "ARGMAX", int32Array); - - for (auto &[classLabel, owningBuffer] : *classesToOutput) { - auto classArrayBuffer = jsi::ArrayBuffer(runtime, owningBuffer); - - auto float32ArrayCtor = - runtime.global().getPropertyAsFunction(runtime, "Float32Array"); - auto float32Array = - float32ArrayCtor.callAsConstructor(runtime, classArrayBuffer) - .getObject(runtime); - - dictPtr->setProperty( - runtime, jsi::String::createFromAscii(runtime, classLabel.data()), - float32Array); - } - promisePtr->set_value(); - }); - - doneFuture.wait(); - return dictPtr; -} - -} // namespace rnexecutorch::models::image_segmentation From fd5aca74ea23a6699e3c9e9ca8eff543bc5f3e4d Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Wed, 25 Feb 2026 14:38:39 +0100 Subject: [PATCH 22/37] docs: add correct api references --- .../classes/ClassificationModule.md | 90 +++++++- .../classes/ExecutorchModule.md | 90 +++++++- .../classes/ImageEmbeddingsModule.md | 90 +++++++- .../classes/ImageSegmentationModule.md | 92 +++++++- .../classes/ObjectDetectionModule.md | 204 +++++++++++++++--- .../classes/StyleTransferModule.md | 90 +++++++- .../classes/TextEmbeddingsModule.md | 90 +++++++- .../classes/TextToImageModule.md | 90 +++++++- .../06-api-reference/classes/VADModule.md | 90 +++++++- .../enumerations/RnExecutorchErrorCode.md | 70 +++--- docs/docs/06-api-reference/index.md | 2 + .../docs/06-api-reference/interfaces/Frame.md | 36 ++++ .../interfaces/ObjectDetectionType.md | 80 ++++++- .../06-api-reference/interfaces/PixelData.md | 65 ++++++ .../docs/06-api-reference/typedoc-sidebar.cjs | 2 +- 15 files changed, 1070 insertions(+), 111 deletions(-) create mode 100644 docs/docs/06-api-reference/interfaces/Frame.md create mode 100644 docs/docs/06-api-reference/interfaces/PixelData.md diff --git a/docs/docs/06-api-reference/classes/ClassificationModule.md b/docs/docs/06-api-reference/classes/ClassificationModule.md index f39a1ae9e..066dd9a45 100644 --- a/docs/docs/06-api-reference/classes/ClassificationModule.md +++ b/docs/docs/06-api-reference/classes/ClassificationModule.md @@ -24,13 +24,87 @@ Module for image classification tasks. ## Properties +### generateFromFrame() + +> **generateFromFrame**: (`frameData`, ...`args`) => `any` + +Defined in: [modules/BaseModule.ts:56](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L56) + +Process a camera frame directly for real-time inference. + +This method is bound to a native JSI function after calling `load()`, +making it worklet-compatible and safe to call from VisionCamera's +frame processor thread. + +**Performance characteristics:** + +- **Zero-copy path**: When using `frame.getNativeBuffer()` from VisionCamera v5, + frame data is accessed directly without copying (fastest, recommended). +- **Copy path**: When using `frame.toArrayBuffer()`, pixel data is copied + from native to JS, then accessed from native code (slower, fallback). + +**Usage with VisionCamera:** + +```typescript +const frameOutput = useFrameOutput({ + pixelFormat: 'rgb', + onFrame(frame) { + 'worklet'; + // Zero-copy approach (recommended) + const nativeBuffer = frame.getNativeBuffer(); + const result = model.generateFromFrame( + { + nativeBuffer: nativeBuffer.pointer, + width: frame.width, + height: frame.height, + }, + ...args + ); + nativeBuffer.release(); + frame.dispose(); + }, +}); +``` + +#### Parameters + +##### frameData + +[`Frame`](../interfaces/Frame.md) + +Frame data object with either nativeBuffer (zero-copy) or data (ArrayBuffer) + +##### args + +...`any`[] + +Additional model-specific arguments (e.g., threshold, options) + +#### Returns + +`any` + +Model-specific output (e.g., detections, classifications, embeddings) + +#### See + +[Frame](../interfaces/Frame.md) for frame data format details + +#### Inherited from + +`BaseModule.generateFromFrame` + +--- + ### nativeModule > **nativeModule**: `any` = `null` -Defined in: [modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L8) +Defined in: [modules/BaseModule.ts:17](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L17) + +**`Internal`** -Native module instance +Native module instance (JSI Host Object) #### Inherited from @@ -42,9 +116,11 @@ Native module instance > **delete**(): `void` -Defined in: [modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L41) +Defined in: [modules/BaseModule.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L100) -Unloads the model from memory. +Unloads the model from memory and releases native resources. + +Always call this method when you're done with a model to prevent memory leaks. #### Returns @@ -84,7 +160,9 @@ The classification result. > `protected` **forwardET**(`inputTensor`): `Promise`\<[`TensorPtr`](../interfaces/TensorPtr.md)[]\> -Defined in: [modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L23) +Defined in: [modules/BaseModule.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L80) + +**`Internal`** Runs the model's forward method with the given input tensors. It returns the output tensors that mimic the structure of output from ExecuTorch. @@ -113,7 +191,7 @@ Array of output tensors. > **getInputShape**(`methodName`, `index`): `Promise`\<`number`[]\> -Defined in: [modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L34) +Defined in: [modules/BaseModule.ts:91](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L91) Gets the input shape for a given method and index. diff --git a/docs/docs/06-api-reference/classes/ExecutorchModule.md b/docs/docs/06-api-reference/classes/ExecutorchModule.md index 992deeaee..7935e39cf 100644 --- a/docs/docs/06-api-reference/classes/ExecutorchModule.md +++ b/docs/docs/06-api-reference/classes/ExecutorchModule.md @@ -24,13 +24,87 @@ General module for executing custom Executorch models. ## Properties +### generateFromFrame() + +> **generateFromFrame**: (`frameData`, ...`args`) => `any` + +Defined in: [modules/BaseModule.ts:56](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L56) + +Process a camera frame directly for real-time inference. + +This method is bound to a native JSI function after calling `load()`, +making it worklet-compatible and safe to call from VisionCamera's +frame processor thread. + +**Performance characteristics:** + +- **Zero-copy path**: When using `frame.getNativeBuffer()` from VisionCamera v5, + frame data is accessed directly without copying (fastest, recommended). +- **Copy path**: When using `frame.toArrayBuffer()`, pixel data is copied + from native to JS, then accessed from native code (slower, fallback). + +**Usage with VisionCamera:** + +```typescript +const frameOutput = useFrameOutput({ + pixelFormat: 'rgb', + onFrame(frame) { + 'worklet'; + // Zero-copy approach (recommended) + const nativeBuffer = frame.getNativeBuffer(); + const result = model.generateFromFrame( + { + nativeBuffer: nativeBuffer.pointer, + width: frame.width, + height: frame.height, + }, + ...args + ); + nativeBuffer.release(); + frame.dispose(); + }, +}); +``` + +#### Parameters + +##### frameData + +[`Frame`](../interfaces/Frame.md) + +Frame data object with either nativeBuffer (zero-copy) or data (ArrayBuffer) + +##### args + +...`any`[] + +Additional model-specific arguments (e.g., threshold, options) + +#### Returns + +`any` + +Model-specific output (e.g., detections, classifications, embeddings) + +#### See + +[Frame](../interfaces/Frame.md) for frame data format details + +#### Inherited from + +`BaseModule.generateFromFrame` + +--- + ### nativeModule > **nativeModule**: `any` = `null` -Defined in: [modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L8) +Defined in: [modules/BaseModule.ts:17](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L17) + +**`Internal`** -Native module instance +Native module instance (JSI Host Object) #### Inherited from @@ -42,9 +116,11 @@ Native module instance > **delete**(): `void` -Defined in: [modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L41) +Defined in: [modules/BaseModule.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L100) -Unloads the model from memory. +Unloads the model from memory and releases native resources. + +Always call this method when you're done with a model to prevent memory leaks. #### Returns @@ -85,7 +161,9 @@ An array of output tensor pointers. > `protected` **forwardET**(`inputTensor`): `Promise`\<[`TensorPtr`](../interfaces/TensorPtr.md)[]\> -Defined in: [modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L23) +Defined in: [modules/BaseModule.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L80) + +**`Internal`** Runs the model's forward method with the given input tensors. It returns the output tensors that mimic the structure of output from ExecuTorch. @@ -114,7 +192,7 @@ Array of output tensors. > **getInputShape**(`methodName`, `index`): `Promise`\<`number`[]\> -Defined in: [modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L34) +Defined in: [modules/BaseModule.ts:91](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L91) Gets the input shape for a given method and index. diff --git a/docs/docs/06-api-reference/classes/ImageEmbeddingsModule.md b/docs/docs/06-api-reference/classes/ImageEmbeddingsModule.md index 68595c61c..ba6016f47 100644 --- a/docs/docs/06-api-reference/classes/ImageEmbeddingsModule.md +++ b/docs/docs/06-api-reference/classes/ImageEmbeddingsModule.md @@ -24,13 +24,87 @@ Module for generating image embeddings from input images. ## Properties +### generateFromFrame() + +> **generateFromFrame**: (`frameData`, ...`args`) => `any` + +Defined in: [modules/BaseModule.ts:56](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L56) + +Process a camera frame directly for real-time inference. + +This method is bound to a native JSI function after calling `load()`, +making it worklet-compatible and safe to call from VisionCamera's +frame processor thread. + +**Performance characteristics:** + +- **Zero-copy path**: When using `frame.getNativeBuffer()` from VisionCamera v5, + frame data is accessed directly without copying (fastest, recommended). +- **Copy path**: When using `frame.toArrayBuffer()`, pixel data is copied + from native to JS, then accessed from native code (slower, fallback). + +**Usage with VisionCamera:** + +```typescript +const frameOutput = useFrameOutput({ + pixelFormat: 'rgb', + onFrame(frame) { + 'worklet'; + // Zero-copy approach (recommended) + const nativeBuffer = frame.getNativeBuffer(); + const result = model.generateFromFrame( + { + nativeBuffer: nativeBuffer.pointer, + width: frame.width, + height: frame.height, + }, + ...args + ); + nativeBuffer.release(); + frame.dispose(); + }, +}); +``` + +#### Parameters + +##### frameData + +[`Frame`](../interfaces/Frame.md) + +Frame data object with either nativeBuffer (zero-copy) or data (ArrayBuffer) + +##### args + +...`any`[] + +Additional model-specific arguments (e.g., threshold, options) + +#### Returns + +`any` + +Model-specific output (e.g., detections, classifications, embeddings) + +#### See + +[Frame](../interfaces/Frame.md) for frame data format details + +#### Inherited from + +`BaseModule.generateFromFrame` + +--- + ### nativeModule > **nativeModule**: `any` = `null` -Defined in: [modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L8) +Defined in: [modules/BaseModule.ts:17](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L17) + +**`Internal`** -Native module instance +Native module instance (JSI Host Object) #### Inherited from @@ -42,9 +116,11 @@ Native module instance > **delete**(): `void` -Defined in: [modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L41) +Defined in: [modules/BaseModule.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L100) -Unloads the model from memory. +Unloads the model from memory and releases native resources. + +Always call this method when you're done with a model to prevent memory leaks. #### Returns @@ -84,7 +160,9 @@ A Float32Array containing the image embeddings. > `protected` **forwardET**(`inputTensor`): `Promise`\<[`TensorPtr`](../interfaces/TensorPtr.md)[]\> -Defined in: [modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L23) +Defined in: [modules/BaseModule.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L80) + +**`Internal`** Runs the model's forward method with the given input tensors. It returns the output tensors that mimic the structure of output from ExecuTorch. @@ -113,7 +191,7 @@ Array of output tensors. > **getInputShape**(`methodName`, `index`): `Promise`\<`number`[]\> -Defined in: [modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L34) +Defined in: [modules/BaseModule.ts:91](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L91) Gets the input shape for a given method and index. diff --git a/docs/docs/06-api-reference/classes/ImageSegmentationModule.md b/docs/docs/06-api-reference/classes/ImageSegmentationModule.md index b395640ac..6b4128906 100644 --- a/docs/docs/06-api-reference/classes/ImageSegmentationModule.md +++ b/docs/docs/06-api-reference/classes/ImageSegmentationModule.md @@ -21,13 +21,87 @@ or a custom [LabelEnum](../type-aliases/LabelEnum.md) label map. ## Properties +### generateFromFrame() + +> **generateFromFrame**: (`frameData`, ...`args`) => `any` + +Defined in: [modules/BaseModule.ts:56](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L56) + +Process a camera frame directly for real-time inference. + +This method is bound to a native JSI function after calling `load()`, +making it worklet-compatible and safe to call from VisionCamera's +frame processor thread. + +**Performance characteristics:** + +- **Zero-copy path**: When using `frame.getNativeBuffer()` from VisionCamera v5, + frame data is accessed directly without copying (fastest, recommended). +- **Copy path**: When using `frame.toArrayBuffer()`, pixel data is copied + from native to JS, then accessed from native code (slower, fallback). + +**Usage with VisionCamera:** + +```typescript +const frameOutput = useFrameOutput({ + pixelFormat: 'rgb', + onFrame(frame) { + 'worklet'; + // Zero-copy approach (recommended) + const nativeBuffer = frame.getNativeBuffer(); + const result = model.generateFromFrame( + { + nativeBuffer: nativeBuffer.pointer, + width: frame.width, + height: frame.height, + }, + ...args + ); + nativeBuffer.release(); + frame.dispose(); + }, +}); +``` + +#### Parameters + +##### frameData + +[`Frame`](../interfaces/Frame.md) + +Frame data object with either nativeBuffer (zero-copy) or data (ArrayBuffer) + +##### args + +...`any`[] + +Additional model-specific arguments (e.g., threshold, options) + +#### Returns + +`any` + +Model-specific output (e.g., detections, classifications, embeddings) + +#### See + +[Frame](../interfaces/Frame.md) for frame data format details + +#### Inherited from + +`BaseModule.generateFromFrame` + +--- + ### nativeModule > **nativeModule**: `any` = `null` -Defined in: [modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L8) +Defined in: [modules/BaseModule.ts:17](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L17) + +**`Internal`** -Native module instance +Native module instance (JSI Host Object) #### Inherited from @@ -39,9 +113,11 @@ Native module instance > **delete**(): `void` -Defined in: [modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L41) +Defined in: [modules/BaseModule.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L100) -Unloads the model from memory. +Unloads the model from memory and releases native resources. + +Always call this method when you're done with a model to prevent memory leaks. #### Returns @@ -103,7 +179,9 @@ If the model is not loaded. > `protected` **forwardET**(`inputTensor`): `Promise`\<[`TensorPtr`](../interfaces/TensorPtr.md)[]\> -Defined in: [modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L23) +Defined in: [modules/BaseModule.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L80) + +**`Internal`** Runs the model's forward method with the given input tensors. It returns the output tensors that mimic the structure of output from ExecuTorch. @@ -132,7 +210,7 @@ Array of output tensors. > **getInputShape**(`methodName`, `index`): `Promise`\<`number`[]\> -Defined in: [modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L34) +Defined in: [modules/BaseModule.ts:91](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L91) Gets the input shape for a given method and index. @@ -168,6 +246,8 @@ The input shape as an array of numbers. Defined in: [modules/computer_vision/ImageSegmentationModule.ts:76](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts#L76) +Load the model and prepare it for inference. + #### Returns `Promise`\<`void`\> diff --git a/docs/docs/06-api-reference/classes/ObjectDetectionModule.md b/docs/docs/06-api-reference/classes/ObjectDetectionModule.md index 38fd14f56..f0c61d6a6 100644 --- a/docs/docs/06-api-reference/classes/ObjectDetectionModule.md +++ b/docs/docs/06-api-reference/classes/ObjectDetectionModule.md @@ -6,7 +6,7 @@ Module for object detection tasks. ## Extends -- `BaseModule` +- `VisionModule`\<[`Detection`](../interfaces/Detection.md)[]\> ## Constructors @@ -20,21 +20,141 @@ Module for object detection tasks. #### Inherited from -`BaseModule.constructor` +`VisionModule.constructor` ## Properties +### generateFromFrame() + +> **generateFromFrame**: (`frameData`, ...`args`) => `any` + +Defined in: [modules/BaseModule.ts:56](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L56) + +Process a camera frame directly for real-time inference. + +This method is bound to a native JSI function after calling `load()`, +making it worklet-compatible and safe to call from VisionCamera's +frame processor thread. + +**Performance characteristics:** + +- **Zero-copy path**: When using `frame.getNativeBuffer()` from VisionCamera v5, + frame data is accessed directly without copying (fastest, recommended). +- **Copy path**: When using `frame.toArrayBuffer()`, pixel data is copied + from native to JS, then accessed from native code (slower, fallback). + +**Usage with VisionCamera:** + +```typescript +const frameOutput = useFrameOutput({ + pixelFormat: 'rgb', + onFrame(frame) { + 'worklet'; + // Zero-copy approach (recommended) + const nativeBuffer = frame.getNativeBuffer(); + const result = model.generateFromFrame( + { + nativeBuffer: nativeBuffer.pointer, + width: frame.width, + height: frame.height, + }, + ...args + ); + nativeBuffer.release(); + frame.dispose(); + }, +}); +``` + +#### Parameters + +##### frameData + +[`Frame`](../interfaces/Frame.md) + +Frame data object with either nativeBuffer (zero-copy) or data (ArrayBuffer) + +##### args + +...`any`[] + +Additional model-specific arguments (e.g., threshold, options) + +#### Returns + +`any` + +Model-specific output (e.g., detections, classifications, embeddings) + +#### See + +[Frame](../interfaces/Frame.md) for frame data format details + +#### Inherited from + +`VisionModule.generateFromFrame` + +--- + ### nativeModule > **nativeModule**: `any` = `null` -Defined in: [modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L8) +Defined in: [modules/BaseModule.ts:17](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L17) + +**`Internal`** + +Native module instance (JSI Host Object) -Native module instance +#### Inherited from + +`VisionModule.nativeModule` + +## Accessors + +### runOnFrame + +#### Get Signature + +> **get** **runOnFrame**(): (`frame`, ...`args`) => `TOutput` \| `null` + +Defined in: [modules/computer_vision/VisionModule.ts:61](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/computer_vision/VisionModule.ts#L61) + +Synchronous worklet function for real-time VisionCamera frame processing. + +Only available after the model is loaded. Returns null if not loaded. + +**Use this for VisionCamera frame processing in worklets.** +For async processing, use `forward()` instead. + +##### Example + +```typescript +const model = new ClassificationModule(); +await model.load({ modelSource: MODEL }); + +// Use the functional form of setState to store the worklet β€” passing it +// directly would cause React to invoke it immediately as an updater fn. +const [runOnFrame, setRunOnFrame] = useState(null); +setRunOnFrame(() => model.runOnFrame); + +const frameOutput = useFrameOutput({ + onFrame(frame) { + 'worklet'; + if (!runOnFrame) return; + const result = runOnFrame(frame); + frame.dispose(); + }, +}); +``` + +##### Returns + +(`frame`, ...`args`) => `TOutput` \| `null` #### Inherited from -`BaseModule.nativeModule` +`VisionModule.runOnFrame` ## Methods @@ -42,9 +162,11 @@ Native module instance > **delete**(): `void` -Defined in: [modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L41) +Defined in: [modules/BaseModule.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L100) + +Unloads the model from memory and releases native resources. -Unloads the model from memory. +Always call this method when you're done with a model to prevent memory leaks. #### Returns @@ -52,38 +174,70 @@ Unloads the model from memory. #### Inherited from -`BaseModule.delete` +`VisionModule.delete` --- ### forward() -> **forward**(`imageSource`, `detectionThreshold`): `Promise`\<[`Detection`](../interfaces/Detection.md)[]\> +> **forward**(`input`, `detectionThreshold`): `Promise`\<[`Detection`](../interfaces/Detection.md)[]\> + +Defined in: [modules/computer_vision/ObjectDetectionModule.ts:46](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts#L46) + +Executes the model's forward pass with automatic input type detection. -Defined in: [modules/computer_vision/ObjectDetectionModule.ts:54](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts#L54) +Supports two input types: -Executes the model's forward pass, where `imageSource` can be a fetchable resource or a Base64-encoded string. -`detectionThreshold` can be supplied to alter the sensitivity of the detection. +1. **String path/URI**: File path, URL, or Base64-encoded string +2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage) + +**Note**: For VisionCamera frame processing, use `runOnFrame` instead. +This method is async and cannot be called in worklet context. #### Parameters -##### imageSource +##### input -`string` +Image source (string path or PixelData object) -The image source to be processed. +`string` | [`PixelData`](../interfaces/PixelData.md) ##### detectionThreshold -`number` = `0.7` - -The threshold for detection sensitivity. Default is 0.7. +`number` = `0.5` #### Returns `Promise`\<[`Detection`](../interfaces/Detection.md)[]\> -An array of Detection objects representing detected items in the image. +A Promise that resolves to the model output. + +#### Example + +```typescript +// String path (async) +const result1 = await model.forward('file:///path/to/image.jpg'); + +// Pixel data (async) +const result2 = await model.forward({ + dataPtr: new Uint8Array(pixelBuffer), + sizes: [480, 640, 3], + scalarType: ScalarType.BYTE, +}); + +// For VisionCamera frames, use runOnFrame in worklet: +const frameOutput = useFrameOutput({ + onFrame(frame) { + 'worklet'; + if (!model.runOnFrame) return; + const result = model.runOnFrame(frame); + }, +}); +``` + +#### Overrides + +`VisionModule.forward` --- @@ -91,7 +245,9 @@ An array of Detection objects representing detected items in the image. > `protected` **forwardET**(`inputTensor`): `Promise`\<[`TensorPtr`](../interfaces/TensorPtr.md)[]\> -Defined in: [modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L23) +Defined in: [modules/BaseModule.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L80) + +**`Internal`** Runs the model's forward method with the given input tensors. It returns the output tensors that mimic the structure of output from ExecuTorch. @@ -112,7 +268,7 @@ Array of output tensors. #### Inherited from -`BaseModule.forwardET` +`VisionModule.forwardET` --- @@ -120,7 +276,7 @@ Array of output tensors. > **getInputShape**(`methodName`, `index`): `Promise`\<`number`[]\> -Defined in: [modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L34) +Defined in: [modules/BaseModule.ts:91](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L91) Gets the input shape for a given method and index. @@ -146,7 +302,7 @@ The input shape as an array of numbers. #### Inherited from -`BaseModule.getInputShape` +`VisionModule.getInputShape` --- @@ -181,4 +337,4 @@ Optional callback to monitor download progress. #### Overrides -`BaseModule.load` +`VisionModule.load` diff --git a/docs/docs/06-api-reference/classes/StyleTransferModule.md b/docs/docs/06-api-reference/classes/StyleTransferModule.md index 1efc27c02..c6923ddf6 100644 --- a/docs/docs/06-api-reference/classes/StyleTransferModule.md +++ b/docs/docs/06-api-reference/classes/StyleTransferModule.md @@ -24,13 +24,87 @@ Module for style transfer tasks. ## Properties +### generateFromFrame() + +> **generateFromFrame**: (`frameData`, ...`args`) => `any` + +Defined in: [modules/BaseModule.ts:56](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L56) + +Process a camera frame directly for real-time inference. + +This method is bound to a native JSI function after calling `load()`, +making it worklet-compatible and safe to call from VisionCamera's +frame processor thread. + +**Performance characteristics:** + +- **Zero-copy path**: When using `frame.getNativeBuffer()` from VisionCamera v5, + frame data is accessed directly without copying (fastest, recommended). +- **Copy path**: When using `frame.toArrayBuffer()`, pixel data is copied + from native to JS, then accessed from native code (slower, fallback). + +**Usage with VisionCamera:** + +```typescript +const frameOutput = useFrameOutput({ + pixelFormat: 'rgb', + onFrame(frame) { + 'worklet'; + // Zero-copy approach (recommended) + const nativeBuffer = frame.getNativeBuffer(); + const result = model.generateFromFrame( + { + nativeBuffer: nativeBuffer.pointer, + width: frame.width, + height: frame.height, + }, + ...args + ); + nativeBuffer.release(); + frame.dispose(); + }, +}); +``` + +#### Parameters + +##### frameData + +[`Frame`](../interfaces/Frame.md) + +Frame data object with either nativeBuffer (zero-copy) or data (ArrayBuffer) + +##### args + +...`any`[] + +Additional model-specific arguments (e.g., threshold, options) + +#### Returns + +`any` + +Model-specific output (e.g., detections, classifications, embeddings) + +#### See + +[Frame](../interfaces/Frame.md) for frame data format details + +#### Inherited from + +`BaseModule.generateFromFrame` + +--- + ### nativeModule > **nativeModule**: `any` = `null` -Defined in: [modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L8) +Defined in: [modules/BaseModule.ts:17](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L17) + +**`Internal`** -Native module instance +Native module instance (JSI Host Object) #### Inherited from @@ -42,9 +116,11 @@ Native module instance > **delete**(): `void` -Defined in: [modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L41) +Defined in: [modules/BaseModule.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L100) -Unloads the model from memory. +Unloads the model from memory and releases native resources. + +Always call this method when you're done with a model to prevent memory leaks. #### Returns @@ -84,7 +160,9 @@ The stylized image as a Base64-encoded string. > `protected` **forwardET**(`inputTensor`): `Promise`\<[`TensorPtr`](../interfaces/TensorPtr.md)[]\> -Defined in: [modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L23) +Defined in: [modules/BaseModule.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L80) + +**`Internal`** Runs the model's forward method with the given input tensors. It returns the output tensors that mimic the structure of output from ExecuTorch. @@ -113,7 +191,7 @@ Array of output tensors. > **getInputShape**(`methodName`, `index`): `Promise`\<`number`[]\> -Defined in: [modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L34) +Defined in: [modules/BaseModule.ts:91](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L91) Gets the input shape for a given method and index. diff --git a/docs/docs/06-api-reference/classes/TextEmbeddingsModule.md b/docs/docs/06-api-reference/classes/TextEmbeddingsModule.md index 72053896b..9c7dece38 100644 --- a/docs/docs/06-api-reference/classes/TextEmbeddingsModule.md +++ b/docs/docs/06-api-reference/classes/TextEmbeddingsModule.md @@ -24,13 +24,87 @@ Module for generating text embeddings from input text. ## Properties +### generateFromFrame() + +> **generateFromFrame**: (`frameData`, ...`args`) => `any` + +Defined in: [modules/BaseModule.ts:56](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L56) + +Process a camera frame directly for real-time inference. + +This method is bound to a native JSI function after calling `load()`, +making it worklet-compatible and safe to call from VisionCamera's +frame processor thread. + +**Performance characteristics:** + +- **Zero-copy path**: When using `frame.getNativeBuffer()` from VisionCamera v5, + frame data is accessed directly without copying (fastest, recommended). +- **Copy path**: When using `frame.toArrayBuffer()`, pixel data is copied + from native to JS, then accessed from native code (slower, fallback). + +**Usage with VisionCamera:** + +```typescript +const frameOutput = useFrameOutput({ + pixelFormat: 'rgb', + onFrame(frame) { + 'worklet'; + // Zero-copy approach (recommended) + const nativeBuffer = frame.getNativeBuffer(); + const result = model.generateFromFrame( + { + nativeBuffer: nativeBuffer.pointer, + width: frame.width, + height: frame.height, + }, + ...args + ); + nativeBuffer.release(); + frame.dispose(); + }, +}); +``` + +#### Parameters + +##### frameData + +[`Frame`](../interfaces/Frame.md) + +Frame data object with either nativeBuffer (zero-copy) or data (ArrayBuffer) + +##### args + +...`any`[] + +Additional model-specific arguments (e.g., threshold, options) + +#### Returns + +`any` + +Model-specific output (e.g., detections, classifications, embeddings) + +#### See + +[Frame](../interfaces/Frame.md) for frame data format details + +#### Inherited from + +`BaseModule.generateFromFrame` + +--- + ### nativeModule > **nativeModule**: `any` = `null` -Defined in: [modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L8) +Defined in: [modules/BaseModule.ts:17](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L17) + +**`Internal`** -Native module instance +Native module instance (JSI Host Object) #### Inherited from @@ -42,9 +116,11 @@ Native module instance > **delete**(): `void` -Defined in: [modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L41) +Defined in: [modules/BaseModule.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L100) -Unloads the model from memory. +Unloads the model from memory and releases native resources. + +Always call this method when you're done with a model to prevent memory leaks. #### Returns @@ -84,7 +160,9 @@ A Float32Array containing the vector embeddings. > `protected` **forwardET**(`inputTensor`): `Promise`\<[`TensorPtr`](../interfaces/TensorPtr.md)[]\> -Defined in: [modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L23) +Defined in: [modules/BaseModule.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L80) + +**`Internal`** Runs the model's forward method with the given input tensors. It returns the output tensors that mimic the structure of output from ExecuTorch. @@ -113,7 +191,7 @@ Array of output tensors. > **getInputShape**(`methodName`, `index`): `Promise`\<`number`[]\> -Defined in: [modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L34) +Defined in: [modules/BaseModule.ts:91](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L91) Gets the input shape for a given method and index. diff --git a/docs/docs/06-api-reference/classes/TextToImageModule.md b/docs/docs/06-api-reference/classes/TextToImageModule.md index 2450c09c3..63bc34ae9 100644 --- a/docs/docs/06-api-reference/classes/TextToImageModule.md +++ b/docs/docs/06-api-reference/classes/TextToImageModule.md @@ -36,13 +36,87 @@ Optional callback function that receives the current step index during inference ## Properties +### generateFromFrame() + +> **generateFromFrame**: (`frameData`, ...`args`) => `any` + +Defined in: [modules/BaseModule.ts:56](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L56) + +Process a camera frame directly for real-time inference. + +This method is bound to a native JSI function after calling `load()`, +making it worklet-compatible and safe to call from VisionCamera's +frame processor thread. + +**Performance characteristics:** + +- **Zero-copy path**: When using `frame.getNativeBuffer()` from VisionCamera v5, + frame data is accessed directly without copying (fastest, recommended). +- **Copy path**: When using `frame.toArrayBuffer()`, pixel data is copied + from native to JS, then accessed from native code (slower, fallback). + +**Usage with VisionCamera:** + +```typescript +const frameOutput = useFrameOutput({ + pixelFormat: 'rgb', + onFrame(frame) { + 'worklet'; + // Zero-copy approach (recommended) + const nativeBuffer = frame.getNativeBuffer(); + const result = model.generateFromFrame( + { + nativeBuffer: nativeBuffer.pointer, + width: frame.width, + height: frame.height, + }, + ...args + ); + nativeBuffer.release(); + frame.dispose(); + }, +}); +``` + +#### Parameters + +##### frameData + +[`Frame`](../interfaces/Frame.md) + +Frame data object with either nativeBuffer (zero-copy) or data (ArrayBuffer) + +##### args + +...`any`[] + +Additional model-specific arguments (e.g., threshold, options) + +#### Returns + +`any` + +Model-specific output (e.g., detections, classifications, embeddings) + +#### See + +[Frame](../interfaces/Frame.md) for frame data format details + +#### Inherited from + +`BaseModule.generateFromFrame` + +--- + ### nativeModule > **nativeModule**: `any` = `null` -Defined in: [modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L8) +Defined in: [modules/BaseModule.ts:17](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L17) + +**`Internal`** -Native module instance +Native module instance (JSI Host Object) #### Inherited from @@ -54,9 +128,11 @@ Native module instance > **delete**(): `void` -Defined in: [modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L41) +Defined in: [modules/BaseModule.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L100) -Unloads the model from memory. +Unloads the model from memory and releases native resources. + +Always call this method when you're done with a model to prevent memory leaks. #### Returns @@ -115,7 +191,9 @@ A Base64-encoded string representing the generated PNG image. > `protected` **forwardET**(`inputTensor`): `Promise`\<[`TensorPtr`](../interfaces/TensorPtr.md)[]\> -Defined in: [modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L23) +Defined in: [modules/BaseModule.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L80) + +**`Internal`** Runs the model's forward method with the given input tensors. It returns the output tensors that mimic the structure of output from ExecuTorch. @@ -144,7 +222,7 @@ Array of output tensors. > **getInputShape**(`methodName`, `index`): `Promise`\<`number`[]\> -Defined in: [modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L34) +Defined in: [modules/BaseModule.ts:91](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L91) Gets the input shape for a given method and index. diff --git a/docs/docs/06-api-reference/classes/VADModule.md b/docs/docs/06-api-reference/classes/VADModule.md index f37c5239e..996d69832 100644 --- a/docs/docs/06-api-reference/classes/VADModule.md +++ b/docs/docs/06-api-reference/classes/VADModule.md @@ -24,13 +24,87 @@ Module for Voice Activity Detection (VAD) functionalities. ## Properties +### generateFromFrame() + +> **generateFromFrame**: (`frameData`, ...`args`) => `any` + +Defined in: [modules/BaseModule.ts:56](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L56) + +Process a camera frame directly for real-time inference. + +This method is bound to a native JSI function after calling `load()`, +making it worklet-compatible and safe to call from VisionCamera's +frame processor thread. + +**Performance characteristics:** + +- **Zero-copy path**: When using `frame.getNativeBuffer()` from VisionCamera v5, + frame data is accessed directly without copying (fastest, recommended). +- **Copy path**: When using `frame.toArrayBuffer()`, pixel data is copied + from native to JS, then accessed from native code (slower, fallback). + +**Usage with VisionCamera:** + +```typescript +const frameOutput = useFrameOutput({ + pixelFormat: 'rgb', + onFrame(frame) { + 'worklet'; + // Zero-copy approach (recommended) + const nativeBuffer = frame.getNativeBuffer(); + const result = model.generateFromFrame( + { + nativeBuffer: nativeBuffer.pointer, + width: frame.width, + height: frame.height, + }, + ...args + ); + nativeBuffer.release(); + frame.dispose(); + }, +}); +``` + +#### Parameters + +##### frameData + +[`Frame`](../interfaces/Frame.md) + +Frame data object with either nativeBuffer (zero-copy) or data (ArrayBuffer) + +##### args + +...`any`[] + +Additional model-specific arguments (e.g., threshold, options) + +#### Returns + +`any` + +Model-specific output (e.g., detections, classifications, embeddings) + +#### See + +[Frame](../interfaces/Frame.md) for frame data format details + +#### Inherited from + +`BaseModule.generateFromFrame` + +--- + ### nativeModule > **nativeModule**: `any` = `null` -Defined in: [modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L8) +Defined in: [modules/BaseModule.ts:17](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L17) + +**`Internal`** -Native module instance +Native module instance (JSI Host Object) #### Inherited from @@ -42,9 +116,11 @@ Native module instance > **delete**(): `void` -Defined in: [modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L41) +Defined in: [modules/BaseModule.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L100) -Unloads the model from memory. +Unloads the model from memory and releases native resources. + +Always call this method when you're done with a model to prevent memory leaks. #### Returns @@ -84,7 +160,9 @@ A promise resolving to an array of detected speech segments. > `protected` **forwardET**(`inputTensor`): `Promise`\<[`TensorPtr`](../interfaces/TensorPtr.md)[]\> -Defined in: [modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L23) +Defined in: [modules/BaseModule.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L80) + +**`Internal`** Runs the model's forward method with the given input tensors. It returns the output tensors that mimic the structure of output from ExecuTorch. @@ -113,7 +191,7 @@ Array of output tensors. > **getInputShape**(`methodName`, `index`): `Promise`\<`number`[]\> -Defined in: [modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L34) +Defined in: [modules/BaseModule.ts:91](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/modules/BaseModule.ts#L91) Gets the input shape for a given method and index. diff --git a/docs/docs/06-api-reference/enumerations/RnExecutorchErrorCode.md b/docs/docs/06-api-reference/enumerations/RnExecutorchErrorCode.md index c5cdde479..8af6a41a0 100644 --- a/docs/docs/06-api-reference/enumerations/RnExecutorchErrorCode.md +++ b/docs/docs/06-api-reference/enumerations/RnExecutorchErrorCode.md @@ -8,7 +8,7 @@ Defined in: [errors/ErrorCodes.ts:4](https://github.com/software-mansion/react-n > **AccessFailed**: `34` -Defined in: [errors/ErrorCodes.ts:156](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L156) +Defined in: [errors/ErrorCodes.ts:160](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L160) Could not access a resource. @@ -18,7 +18,7 @@ Could not access a resource. > **DelegateInvalidCompatibility**: `48` -Defined in: [errors/ErrorCodes.ts:172](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L172) +Defined in: [errors/ErrorCodes.ts:176](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L176) Init stage: Backend receives an incompatible delegate version. @@ -28,7 +28,7 @@ Init stage: Backend receives an incompatible delegate version. > **DelegateInvalidHandle**: `50` -Defined in: [errors/ErrorCodes.ts:180](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L180) +Defined in: [errors/ErrorCodes.ts:184](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L184) Execute stage: The handle is invalid. @@ -38,7 +38,7 @@ Execute stage: The handle is invalid. > **DelegateMemoryAllocationFailed**: `49` -Defined in: [errors/ErrorCodes.ts:176](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L176) +Defined in: [errors/ErrorCodes.ts:180](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L180) Init stage: Backend fails to allocate memory. @@ -58,7 +58,7 @@ Thrown when the number of downloaded files is unexpected, due to download interr > **EndOfMethod**: `3` -Defined in: [errors/ErrorCodes.ts:124](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L124) +Defined in: [errors/ErrorCodes.ts:128](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L128) Status indicating there are no more steps of execution to run @@ -88,7 +88,7 @@ An error ocurred when saving a file. This could be, for instance a result image > **Internal**: `1` -Defined in: [errors/ErrorCodes.ts:116](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L116) +Defined in: [errors/ErrorCodes.ts:120](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L120) An internal error occurred. @@ -98,7 +98,7 @@ An internal error occurred. > **InvalidArgument**: `18` -Defined in: [errors/ErrorCodes.ts:136](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L136) +Defined in: [errors/ErrorCodes.ts:140](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L140) User provided an invalid argument. @@ -118,7 +118,7 @@ Thrown when config parameters passed to a model are invalid. For example, when L > **InvalidExternalData**: `36` -Defined in: [errors/ErrorCodes.ts:164](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L164) +Defined in: [errors/ErrorCodes.ts:168](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L168) Error caused by the contents of external data. @@ -148,7 +148,7 @@ Thrown when the type of model source passed by the user is invalid. > **InvalidProgram**: `35` -Defined in: [errors/ErrorCodes.ts:160](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L160) +Defined in: [errors/ErrorCodes.ts:164](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L164) Error caused by the contents of a program. @@ -158,7 +158,7 @@ Error caused by the contents of a program. > **InvalidState**: `2` -Defined in: [errors/ErrorCodes.ts:120](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L120) +Defined in: [errors/ErrorCodes.ts:124](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L124) Status indicating the executor is in an invalid state for a targeted operation. @@ -168,7 +168,7 @@ Status indicating the executor is in an invalid state for a targeted operation. > **InvalidType**: `19` -Defined in: [errors/ErrorCodes.ts:140](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L140) +Defined in: [errors/ErrorCodes.ts:144](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L144) Object is an invalid type for the operation. @@ -198,7 +198,7 @@ Thrown when a language is passed to a multi-language model that is not supported > **MemoryAllocationFailed**: `33` -Defined in: [errors/ErrorCodes.ts:152](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L152) +Defined in: [errors/ErrorCodes.ts:156](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L156) Could not allocate the requested memory. @@ -208,7 +208,7 @@ Could not allocate the requested memory. > **MissingDataChunk**: `161` -Defined in: [errors/ErrorCodes.ts:72](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L72) +Defined in: [errors/ErrorCodes.ts:76](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L76) Thrown when streaming transcription is attempted but audio data chunk is missing. @@ -238,7 +238,7 @@ Thrown when a user tries to run a model that is not yet downloaded or loaded int > **MultilingualConfiguration**: `160` -Defined in: [errors/ErrorCodes.ts:68](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L68) +Defined in: [errors/ErrorCodes.ts:72](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L72) Thrown when there's a configuration mismatch between multilingual and language settings in Speech-to-Text models. @@ -248,7 +248,7 @@ Thrown when there's a configuration mismatch between multilingual and language s > **NotFound**: `32` -Defined in: [errors/ErrorCodes.ts:148](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L148) +Defined in: [errors/ErrorCodes.ts:152](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L152) Requested resource could not be found. @@ -258,7 +258,7 @@ Requested resource could not be found. > **NotImplemented**: `17` -Defined in: [errors/ErrorCodes.ts:132](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L132) +Defined in: [errors/ErrorCodes.ts:136](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L136) Operation is not yet implemented. @@ -268,7 +268,7 @@ Operation is not yet implemented. > **NotSupported**: `16` -Defined in: [errors/ErrorCodes.ts:128](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L128) +Defined in: [errors/ErrorCodes.ts:132](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L132) Operation is not supported in the current context. @@ -278,7 +278,7 @@ Operation is not supported in the current context. > **Ok**: `0` -Defined in: [errors/ErrorCodes.ts:112](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L112) +Defined in: [errors/ErrorCodes.ts:116](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L116) Status indicating a successful operation. @@ -288,7 +288,7 @@ Status indicating a successful operation. > **OperatorMissing**: `20` -Defined in: [errors/ErrorCodes.ts:144](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L144) +Defined in: [errors/ErrorCodes.ts:148](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L148) Operator(s) missing in the operator registry. @@ -298,17 +298,27 @@ Operator(s) missing in the operator registry. > **OutOfResources**: `37` -Defined in: [errors/ErrorCodes.ts:168](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L168) +Defined in: [errors/ErrorCodes.ts:172](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L172) Does not have enough resources to perform the requested operation. --- +### PlatformNotSupported + +> **PlatformNotSupported**: `119` + +Defined in: [errors/ErrorCodes.ts:64](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L64) + +Thrown when a feature or platform is not supported in the current environment. + +--- + ### ResourceFetcherAdapterNotInitialized > **ResourceFetcherAdapterNotInitialized**: `186` -Defined in: [errors/ErrorCodes.ts:108](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L108) +Defined in: [errors/ErrorCodes.ts:112](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L112) Thrown when trying to load resources without fetcher initialization. @@ -318,7 +328,7 @@ Thrown when trying to load resources without fetcher initialization. > **ResourceFetcherAlreadyOngoing**: `183` -Defined in: [errors/ErrorCodes.ts:96](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L96) +Defined in: [errors/ErrorCodes.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L100) Thrown when trying to resume a download that is already ongoing. @@ -328,7 +338,7 @@ Thrown when trying to resume a download that is already ongoing. > **ResourceFetcherAlreadyPaused**: `182` -Defined in: [errors/ErrorCodes.ts:92](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L92) +Defined in: [errors/ErrorCodes.ts:96](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L96) Thrown when trying to pause a download that is already paused. @@ -338,7 +348,7 @@ Thrown when trying to pause a download that is already paused. > **ResourceFetcherDownloadFailed**: `180` -Defined in: [errors/ErrorCodes.ts:84](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L84) +Defined in: [errors/ErrorCodes.ts:88](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L88) Thrown when a resource fails to download. This could be due to invalid URL, or for example a network problem. @@ -348,7 +358,7 @@ Thrown when a resource fails to download. This could be due to invalid URL, or f > **ResourceFetcherDownloadInProgress**: `181` -Defined in: [errors/ErrorCodes.ts:88](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L88) +Defined in: [errors/ErrorCodes.ts:92](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L92) Thrown when a user tries to trigger a download that's already in progress. @@ -358,7 +368,7 @@ Thrown when a user tries to trigger a download that's already in progress. > **ResourceFetcherMissingUri**: `185` -Defined in: [errors/ErrorCodes.ts:104](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L104) +Defined in: [errors/ErrorCodes.ts:108](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L108) Thrown when required URI information is missing for a download operation. @@ -368,7 +378,7 @@ Thrown when required URI information is missing for a download operation. > **ResourceFetcherNotActive**: `184` -Defined in: [errors/ErrorCodes.ts:100](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L100) +Defined in: [errors/ErrorCodes.ts:104](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L104) Thrown when trying to pause, resume, or cancel a download that is not active. @@ -378,7 +388,7 @@ Thrown when trying to pause, resume, or cancel a download that is not active. > **StreamingInProgress**: `163` -Defined in: [errors/ErrorCodes.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L80) +Defined in: [errors/ErrorCodes.ts:84](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L84) Thrown when trying to start a new streaming session while another is already in progress. @@ -388,7 +398,7 @@ Thrown when trying to start a new streaming session while another is already in > **StreamingNotStarted**: `162` -Defined in: [errors/ErrorCodes.ts:76](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L76) +Defined in: [errors/ErrorCodes.ts:80](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L80) Thrown when trying to stop or insert data into a stream that hasn't been started. @@ -408,7 +418,7 @@ Thrown when React Native ExecuTorch threadpool problem occurs. > **TokenizerError**: `167` -Defined in: [errors/ErrorCodes.ts:64](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L64) +Defined in: [errors/ErrorCodes.ts:68](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/errors/ErrorCodes.ts#L68) Thrown when an error occurs with the tokenizer or tokenization process. diff --git a/docs/docs/06-api-reference/index.md b/docs/docs/06-api-reference/index.md index 125046b67..f49c25e9d 100644 --- a/docs/docs/06-api-reference/index.md +++ b/docs/docs/06-api-reference/index.md @@ -186,6 +186,7 @@ - [RnExecutorchErrorCode](enumerations/RnExecutorchErrorCode.md) - [Logger](classes/Logger.md) - [RnExecutorchError](classes/RnExecutorchError.md) +- [Frame](interfaces/Frame.md) ## TTS Supported Voices @@ -232,6 +233,7 @@ - [OCRDetection](interfaces/OCRDetection.md) - [OCRProps](interfaces/OCRProps.md) - [OCRType](interfaces/OCRType.md) +- [PixelData](interfaces/PixelData.md) - [Point](interfaces/Point.md) - [Segment](interfaces/Segment.md) - [SpeechToTextModelConfig](interfaces/SpeechToTextModelConfig.md) diff --git a/docs/docs/06-api-reference/interfaces/Frame.md b/docs/docs/06-api-reference/interfaces/Frame.md new file mode 100644 index 000000000..149a3837f --- /dev/null +++ b/docs/docs/06-api-reference/interfaces/Frame.md @@ -0,0 +1,36 @@ +# Interface: Frame + +Defined in: [types/common.ts:197](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/types/common.ts#L197) + +Frame data for vision model processing. + +## Methods + +### getNativeBuffer() + +> **getNativeBuffer**(): `object` + +Defined in: [types/common.ts:205](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/types/common.ts#L205) + +Pointer to native platform buffer (zero-copy, best performance). + +- On iOS: CVPixelBufferRef pointer +- On Android: AHardwareBuffer\* pointer + +Obtain from Vision Camera v5: `frame.getNativeBuffer().pointer` + +#### Returns + +`object` + +##### pointer + +> **pointer**: `bigint` + +##### release() + +> **release**(): `void` + +###### Returns + +`void` diff --git a/docs/docs/06-api-reference/interfaces/ObjectDetectionType.md b/docs/docs/06-api-reference/interfaces/ObjectDetectionType.md index a9f28e5cf..4bd5dba98 100644 --- a/docs/docs/06-api-reference/interfaces/ObjectDetectionType.md +++ b/docs/docs/06-api-reference/interfaces/ObjectDetectionType.md @@ -29,36 +29,57 @@ Contains the error object if the model failed to load, download, or encountered ### forward() -> **forward**: (`imageSource`, `detectionThreshold?`) => `Promise`\<[`Detection`](Detection.md)[]\> +> **forward**: (`input`, `detectionThreshold?`) => `Promise`\<[`Detection`](Detection.md)[]\> -Defined in: [types/objectDetection.ts:179](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/types/objectDetection.ts#L179) +Defined in: [types/objectDetection.ts:199](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/types/objectDetection.ts#L199) -Executes the model's forward pass to detect objects within the provided image. +Executes the model's forward pass with automatic input type detection. + +Supports two input types: + +1. **String path/URI**: File path, URL, or Base64-encoded string +2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage) + +**Note**: For VisionCamera frame processing, use `runOnFrame` instead. #### Parameters -##### imageSource +##### input -`string` +Image source (string or PixelData object) -A string representing the image source (e.g., a file path, URI, or base64 string) to be processed. +`string` | [`PixelData`](PixelData.md) ##### detectionThreshold? `number` -An optional number between 0 and 1 representing the minimum confidence score required for an object to be included in the results. Default is 0.7. +An optional number between 0 and 1 representing the minimum confidence score. Default is 0.5. #### Returns `Promise`\<[`Detection`](Detection.md)[]\> -A Promise that resolves to an array of `Detection` objects, where each object typically contains bounding box coordinates, a class label, and a confidence score. +A Promise that resolves to an array of `Detection` objects. #### Throws If the model is not loaded or is currently processing another image. +#### Example + +```typescript +// String path +const detections1 = await model.forward('file:///path/to/image.jpg'); + +// Pixel data +const detections2 = await model.forward({ + dataPtr: new Uint8Array(rgbPixels), + sizes: [480, 640, 3], + scalarType: ScalarType.BYTE, +}); +``` + --- ### isGenerating @@ -78,3 +99,46 @@ Indicates whether the model is currently processing an image. Defined in: [types/objectDetection.ts:160](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/types/objectDetection.ts#L160) Indicates whether the object detection model is loaded and ready to process images. + +--- + +### runOnFrame + +> **runOnFrame**: (`frame`, `detectionThreshold`) => [`Detection`](Detection.md)[] \| `null` + +Defined in: [types/objectDetection.ts:231](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/types/objectDetection.ts#L231) + +Synchronous worklet function for real-time VisionCamera frame processing. +Automatically handles native buffer extraction and cleanup. + +**Use this for VisionCamera frame processing in worklets.** +For async processing, use `forward()` instead. + +Available after model is loaded (`isReady: true`). + +#### Example + +```typescript +const { runOnFrame, isReady } = useObjectDetection({ model: MODEL }); + +const frameOutput = useFrameOutput({ + onFrame(frame) { + 'worklet'; + if (!runOnFrame) return; + const detections = runOnFrame(frame, 0.5); + frame.dispose(); + }, +}); +``` + +#### Param + +VisionCamera Frame object + +#### Param + +The threshold for detection sensitivity. + +#### Returns + +Array of Detection objects representing detected items in the frame. diff --git a/docs/docs/06-api-reference/interfaces/PixelData.md b/docs/docs/06-api-reference/interfaces/PixelData.md new file mode 100644 index 000000000..7ef9865aa --- /dev/null +++ b/docs/docs/06-api-reference/interfaces/PixelData.md @@ -0,0 +1,65 @@ +# Interface: PixelData + +Defined in: [types/common.ts:172](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/types/common.ts#L172) + +Represents raw pixel data in RGB format for vision models. + +This type extends TensorPtr with constraints specific to image data: + +- dataPtr must be Uint8Array (8-bit unsigned integers) +- scalarType is always BYTE (ScalarType.BYTE) +- sizes represents [height, width, channels] where channels must be 3 (RGB) + +## Example + +```typescript +const pixelData: PixelData = { + dataPtr: new Uint8Array(width * height * 3), // RGB pixel data + sizes: [height, width, 3], // [height, width, channels] + scalarType: ScalarType.BYTE, +}; +``` + +## Extends + +- `Omit`\<[`TensorPtr`](TensorPtr.md), `"dataPtr"` \| `"scalarType"`\> + +## Properties + +### dataPtr + +> **dataPtr**: `Uint8Array` + +Defined in: [types/common.ts:178](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/types/common.ts#L178) + +RGB pixel data as Uint8Array. +Expected format: RGB (3 channels), not RGBA or BGRA. +Size must equal: width _ height _ 3 + +--- + +### scalarType + +> **scalarType**: [`BYTE`](../enumerations/ScalarType.md#byte) + +Defined in: [types/common.ts:191](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/types/common.ts#L191) + +Scalar type is always BYTE for pixel data. + +--- + +### sizes + +> **sizes**: \[`number`, `number`, `3`\] + +Defined in: [types/common.ts:186](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/src/types/common.ts#L186) + +Dimensions of the pixel data: [height, width, channels]. + +- sizes[0]: height (number of rows) +- sizes[1]: width (number of columns) +- sizes[2]: channels (must be 3 for RGB) + +#### Overrides + +[`TensorPtr`](TensorPtr.md).[`sizes`](TensorPtr.md#sizes) diff --git a/docs/docs/06-api-reference/typedoc-sidebar.cjs b/docs/docs/06-api-reference/typedoc-sidebar.cjs index bbd478710..f5d9ec3d4 100644 --- a/docs/docs/06-api-reference/typedoc-sidebar.cjs +++ b/docs/docs/06-api-reference/typedoc-sidebar.cjs @@ -1,4 +1,4 @@ // @ts-check /** @type {import("@docusaurus/plugin-content-docs").SidebarsConfig} */ -const typedocSidebar = {items:[{type:"category",label:"Hooks",items:[{type:"doc",id:"06-api-reference/functions/useClassification",label:"useClassification"},{type:"doc",id:"06-api-reference/functions/useExecutorchModule",label:"useExecutorchModule"},{type:"doc",id:"06-api-reference/functions/useImageEmbeddings",label:"useImageEmbeddings"},{type:"doc",id:"06-api-reference/functions/useImageSegmentation",label:"useImageSegmentation"},{type:"doc",id:"06-api-reference/functions/useLLM",label:"useLLM"},{type:"doc",id:"06-api-reference/functions/useObjectDetection",label:"useObjectDetection"},{type:"doc",id:"06-api-reference/functions/useOCR",label:"useOCR"},{type:"doc",id:"06-api-reference/functions/useSpeechToText",label:"useSpeechToText"},{type:"doc",id:"06-api-reference/functions/useStyleTransfer",label:"useStyleTransfer"},{type:"doc",id:"06-api-reference/functions/useTextEmbeddings",label:"useTextEmbeddings"},{type:"doc",id:"06-api-reference/functions/useTextToImage",label:"useTextToImage"},{type:"doc",id:"06-api-reference/functions/useTextToSpeech",label:"useTextToSpeech"},{type:"doc",id:"06-api-reference/functions/useTokenizer",label:"useTokenizer"},{type:"doc",id:"06-api-reference/functions/useVAD",label:"useVAD"},{type:"doc",id:"06-api-reference/functions/useVerticalOCR",label:"useVerticalOCR"}]},{type:"category",label:"Interfaces",items:[{type:"doc",id:"06-api-reference/interfaces/ResourceSourceExtended",label:"ResourceSourceExtended"}]},{type:"category",label:"Models - Classification",items:[{type:"doc",id:"06-api-reference/variables/EFFICIENTNET_V2_S",label:"EFFICIENTNET_V2_S"}]},{type:"category",label:"Models - Image Embeddings",items:[{type:"doc",id:"06-api-reference/variables/CLIP_VIT_BASE_PATCH32_IMAGE",label:"CLIP_VIT_BASE_PATCH32_IMAGE"}]},{type:"category",label:"Models - Image Generation",items:[{type:"doc",id:"06-api-reference/variables/BK_SDM_TINY_VPRED_256",label:"BK_SDM_TINY_VPRED_256"},{type:"doc",id:"06-api-reference/variables/BK_SDM_TINY_VPRED_512",label:"BK_SDM_TINY_VPRED_512"}]},{type:"category",label:"Models - Image Segmentation",items:[{type:"doc",id:"06-api-reference/variables/DEEPLAB_V3_RESNET50",label:"DEEPLAB_V3_RESNET50"},{type:"doc",id:"06-api-reference/variables/SELFIE_SEGMENTATION",label:"SELFIE_SEGMENTATION"}]},{type:"category",label:"Models - LMM",items:[{type:"doc",id:"06-api-reference/variables/HAMMER2_1_0_5B",label:"HAMMER2_1_0_5B"},{type:"doc",id:"06-api-reference/variables/HAMMER2_1_0_5B_QUANTIZED",label:"HAMMER2_1_0_5B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/HAMMER2_1_1_5B",label:"HAMMER2_1_1_5B"},{type:"doc",id:"06-api-reference/variables/HAMMER2_1_1_5B_QUANTIZED",label:"HAMMER2_1_1_5B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/HAMMER2_1_3B",label:"HAMMER2_1_3B"},{type:"doc",id:"06-api-reference/variables/HAMMER2_1_3B_QUANTIZED",label:"HAMMER2_1_3B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/LFM2_5_1_2B_INSTRUCT",label:"LFM2_5_1_2B_INSTRUCT"},{type:"doc",id:"06-api-reference/variables/LFM2_5_1_2B_INSTRUCT_QUANTIZED",label:"LFM2_5_1_2B_INSTRUCT_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_1B",label:"LLAMA3_2_1B"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_1B_QLORA",label:"LLAMA3_2_1B_QLORA"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_1B_SPINQUANT",label:"LLAMA3_2_1B_SPINQUANT"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_3B",label:"LLAMA3_2_3B"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_3B_QLORA",label:"LLAMA3_2_3B_QLORA"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_3B_SPINQUANT",label:"LLAMA3_2_3B_SPINQUANT"},{type:"doc",id:"06-api-reference/variables/PHI_4_MINI_4B",label:"PHI_4_MINI_4B"},{type:"doc",id:"06-api-reference/variables/PHI_4_MINI_4B_QUANTIZED",label:"PHI_4_MINI_4B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_0_5B",label:"QWEN2_5_0_5B"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_0_5B_QUANTIZED",label:"QWEN2_5_0_5B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_1_5B",label:"QWEN2_5_1_5B"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_1_5B_QUANTIZED",label:"QWEN2_5_1_5B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_3B",label:"QWEN2_5_3B"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_3B_QUANTIZED",label:"QWEN2_5_3B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN3_0_6B",label:"QWEN3_0_6B"},{type:"doc",id:"06-api-reference/variables/QWEN3_0_6B_QUANTIZED",label:"QWEN3_0_6B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN3_1_7B",label:"QWEN3_1_7B"},{type:"doc",id:"06-api-reference/variables/QWEN3_1_7B_QUANTIZED",label:"QWEN3_1_7B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN3_4B",label:"QWEN3_4B"},{type:"doc",id:"06-api-reference/variables/QWEN3_4B_QUANTIZED",label:"QWEN3_4B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_1_7B",label:"SMOLLM2_1_1_7B"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_1_7B_QUANTIZED",label:"SMOLLM2_1_1_7B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_135M",label:"SMOLLM2_1_135M"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_135M_QUANTIZED",label:"SMOLLM2_1_135M_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_360M",label:"SMOLLM2_1_360M"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_360M_QUANTIZED",label:"SMOLLM2_1_360M_QUANTIZED"}]},{type:"category",label:"Models - Object Detection",items:[{type:"doc",id:"06-api-reference/variables/SSDLITE_320_MOBILENET_V3_LARGE",label:"SSDLITE_320_MOBILENET_V3_LARGE"}]},{type:"category",label:"Models - Speech To Text",items:[{type:"doc",id:"06-api-reference/variables/WHISPER_BASE",label:"WHISPER_BASE"},{type:"doc",id:"06-api-reference/variables/WHISPER_BASE_EN",label:"WHISPER_BASE_EN"},{type:"doc",id:"06-api-reference/variables/WHISPER_SMALL",label:"WHISPER_SMALL"},{type:"doc",id:"06-api-reference/variables/WHISPER_SMALL_EN",label:"WHISPER_SMALL_EN"},{type:"doc",id:"06-api-reference/variables/WHISPER_TINY",label:"WHISPER_TINY"},{type:"doc",id:"06-api-reference/variables/WHISPER_TINY_EN",label:"WHISPER_TINY_EN"},{type:"doc",id:"06-api-reference/variables/WHISPER_TINY_EN_QUANTIZED",label:"WHISPER_TINY_EN_QUANTIZED"}]},{type:"category",label:"Models - Style Transfer",items:[{type:"doc",id:"06-api-reference/variables/STYLE_TRANSFER_CANDY",label:"STYLE_TRANSFER_CANDY"},{type:"doc",id:"06-api-reference/variables/STYLE_TRANSFER_MOSAIC",label:"STYLE_TRANSFER_MOSAIC"},{type:"doc",id:"06-api-reference/variables/STYLE_TRANSFER_RAIN_PRINCESS",label:"STYLE_TRANSFER_RAIN_PRINCESS"},{type:"doc",id:"06-api-reference/variables/STYLE_TRANSFER_UDNIE",label:"STYLE_TRANSFER_UDNIE"}]},{type:"category",label:"Models - Text Embeddings",items:[{type:"doc",id:"06-api-reference/variables/ALL_MINILM_L6_V2",label:"ALL_MINILM_L6_V2"},{type:"doc",id:"06-api-reference/variables/ALL_MPNET_BASE_V2",label:"ALL_MPNET_BASE_V2"},{type:"doc",id:"06-api-reference/variables/CLIP_VIT_BASE_PATCH32_TEXT",label:"CLIP_VIT_BASE_PATCH32_TEXT"},{type:"doc",id:"06-api-reference/variables/MULTI_QA_MINILM_L6_COS_V1",label:"MULTI_QA_MINILM_L6_COS_V1"},{type:"doc",id:"06-api-reference/variables/MULTI_QA_MPNET_BASE_DOT_V1",label:"MULTI_QA_MPNET_BASE_DOT_V1"}]},{type:"category",label:"Models - Text to Speech",items:[{type:"doc",id:"06-api-reference/variables/KOKORO_MEDIUM",label:"KOKORO_MEDIUM"},{type:"doc",id:"06-api-reference/variables/KOKORO_SMALL",label:"KOKORO_SMALL"}]},{type:"category",label:"Models - Voice Activity Detection",items:[{type:"doc",id:"06-api-reference/variables/FSMN_VAD",label:"FSMN_VAD"}]},{type:"category",label:"OCR Supported Alphabets",items:[{type:"doc",id:"06-api-reference/variables/OCR_ABAZA",label:"OCR_ABAZA"},{type:"doc",id:"06-api-reference/variables/OCR_ADYGHE",label:"OCR_ADYGHE"},{type:"doc",id:"06-api-reference/variables/OCR_AFRIKAANS",label:"OCR_AFRIKAANS"},{type:"doc",id:"06-api-reference/variables/OCR_ALBANIAN",label:"OCR_ALBANIAN"},{type:"doc",id:"06-api-reference/variables/OCR_AVAR",label:"OCR_AVAR"},{type:"doc",id:"06-api-reference/variables/OCR_AZERBAIJANI",label:"OCR_AZERBAIJANI"},{type:"doc",id:"06-api-reference/variables/OCR_BELARUSIAN",label:"OCR_BELARUSIAN"},{type:"doc",id:"06-api-reference/variables/OCR_BOSNIAN",label:"OCR_BOSNIAN"},{type:"doc",id:"06-api-reference/variables/OCR_BULGARIAN",label:"OCR_BULGARIAN"},{type:"doc",id:"06-api-reference/variables/OCR_CHECHEN",label:"OCR_CHECHEN"},{type:"doc",id:"06-api-reference/variables/OCR_CROATIAN",label:"OCR_CROATIAN"},{type:"doc",id:"06-api-reference/variables/OCR_CZECH",label:"OCR_CZECH"},{type:"doc",id:"06-api-reference/variables/OCR_DANISH",label:"OCR_DANISH"},{type:"doc",id:"06-api-reference/variables/OCR_DARGWA",label:"OCR_DARGWA"},{type:"doc",id:"06-api-reference/variables/OCR_DUTCH",label:"OCR_DUTCH"},{type:"doc",id:"06-api-reference/variables/OCR_ENGLISH",label:"OCR_ENGLISH"},{type:"doc",id:"06-api-reference/variables/OCR_ESTONIAN",label:"OCR_ESTONIAN"},{type:"doc",id:"06-api-reference/variables/OCR_FRENCH",label:"OCR_FRENCH"},{type:"doc",id:"06-api-reference/variables/OCR_GERMAN",label:"OCR_GERMAN"},{type:"doc",id:"06-api-reference/variables/OCR_HUNGARIAN",label:"OCR_HUNGARIAN"},{type:"doc",id:"06-api-reference/variables/OCR_ICELANDIC",label:"OCR_ICELANDIC"},{type:"doc",id:"06-api-reference/variables/OCR_INDONESIAN",label:"OCR_INDONESIAN"},{type:"doc",id:"06-api-reference/variables/OCR_INGUSH",label:"OCR_INGUSH"},{type:"doc",id:"06-api-reference/variables/OCR_IRISH",label:"OCR_IRISH"},{type:"doc",id:"06-api-reference/variables/OCR_ITALIAN",label:"OCR_ITALIAN"},{type:"doc",id:"06-api-reference/variables/OCR_JAPANESE",label:"OCR_JAPANESE"},{type:"doc",id:"06-api-reference/variables/OCR_KANNADA",label:"OCR_KANNADA"},{type:"doc",id:"06-api-reference/variables/OCR_KARBADIAN",label:"OCR_KARBADIAN"},{type:"doc",id:"06-api-reference/variables/OCR_KOREAN",label:"OCR_KOREAN"},{type:"doc",id:"06-api-reference/variables/OCR_KURDISH",label:"OCR_KURDISH"},{type:"doc",id:"06-api-reference/variables/OCR_LAK",label:"OCR_LAK"},{type:"doc",id:"06-api-reference/variables/OCR_LATIN",label:"OCR_LATIN"},{type:"doc",id:"06-api-reference/variables/OCR_LATVIAN",label:"OCR_LATVIAN"},{type:"doc",id:"06-api-reference/variables/OCR_LEZGHIAN",label:"OCR_LEZGHIAN"},{type:"doc",id:"06-api-reference/variables/OCR_LITHUANIAN",label:"OCR_LITHUANIAN"},{type:"doc",id:"06-api-reference/variables/OCR_MALAY",label:"OCR_MALAY"},{type:"doc",id:"06-api-reference/variables/OCR_MALTESE",label:"OCR_MALTESE"},{type:"doc",id:"06-api-reference/variables/OCR_MAORI",label:"OCR_MAORI"},{type:"doc",id:"06-api-reference/variables/OCR_MONGOLIAN",label:"OCR_MONGOLIAN"},{type:"doc",id:"06-api-reference/variables/OCR_NORWEGIAN",label:"OCR_NORWEGIAN"},{type:"doc",id:"06-api-reference/variables/OCR_OCCITAN",label:"OCR_OCCITAN"},{type:"doc",id:"06-api-reference/variables/OCR_PALI",label:"OCR_PALI"},{type:"doc",id:"06-api-reference/variables/OCR_POLISH",label:"OCR_POLISH"},{type:"doc",id:"06-api-reference/variables/OCR_PORTUGUESE",label:"OCR_PORTUGUESE"},{type:"doc",id:"06-api-reference/variables/OCR_ROMANIAN",label:"OCR_ROMANIAN"},{type:"doc",id:"06-api-reference/variables/OCR_RUSSIAN",label:"OCR_RUSSIAN"},{type:"doc",id:"06-api-reference/variables/OCR_SERBIAN_CYRILLIC",label:"OCR_SERBIAN_CYRILLIC"},{type:"doc",id:"06-api-reference/variables/OCR_SERBIAN_LATIN",label:"OCR_SERBIAN_LATIN"},{type:"doc",id:"06-api-reference/variables/OCR_SIMPLIFIED_CHINESE",label:"OCR_SIMPLIFIED_CHINESE"},{type:"doc",id:"06-api-reference/variables/OCR_SLOVAK",label:"OCR_SLOVAK"},{type:"doc",id:"06-api-reference/variables/OCR_SLOVENIAN",label:"OCR_SLOVENIAN"},{type:"doc",id:"06-api-reference/variables/OCR_SPANISH",label:"OCR_SPANISH"},{type:"doc",id:"06-api-reference/variables/OCR_SWAHILI",label:"OCR_SWAHILI"},{type:"doc",id:"06-api-reference/variables/OCR_SWEDISH",label:"OCR_SWEDISH"},{type:"doc",id:"06-api-reference/variables/OCR_TABASSARAN",label:"OCR_TABASSARAN"},{type:"doc",id:"06-api-reference/variables/OCR_TAGALOG",label:"OCR_TAGALOG"},{type:"doc",id:"06-api-reference/variables/OCR_TAJIK",label:"OCR_TAJIK"},{type:"doc",id:"06-api-reference/variables/OCR_TELUGU",label:"OCR_TELUGU"},{type:"doc",id:"06-api-reference/variables/OCR_TURKISH",label:"OCR_TURKISH"},{type:"doc",id:"06-api-reference/variables/OCR_UKRAINIAN",label:"OCR_UKRAINIAN"},{type:"doc",id:"06-api-reference/variables/OCR_UZBEK",label:"OCR_UZBEK"},{type:"doc",id:"06-api-reference/variables/OCR_VIETNAMESE",label:"OCR_VIETNAMESE"},{type:"doc",id:"06-api-reference/variables/OCR_WELSH",label:"OCR_WELSH"}]},{type:"category",label:"Other",items:[{type:"doc",id:"06-api-reference/enumerations/RnExecutorchErrorCode",label:"RnExecutorchErrorCode"},{type:"doc",id:"06-api-reference/classes/Logger",label:"Logger"},{type:"doc",id:"06-api-reference/classes/RnExecutorchError",label:"RnExecutorchError"}]},{type:"category",label:"TTS Supported Voices",items:[{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AF_HEART",label:"KOKORO_VOICE_AF_HEART"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AF_RIVER",label:"KOKORO_VOICE_AF_RIVER"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AF_SARAH",label:"KOKORO_VOICE_AF_SARAH"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AM_ADAM",label:"KOKORO_VOICE_AM_ADAM"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AM_MICHAEL",label:"KOKORO_VOICE_AM_MICHAEL"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AM_SANTA",label:"KOKORO_VOICE_AM_SANTA"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_BF_EMMA",label:"KOKORO_VOICE_BF_EMMA"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_BM_DANIEL",label:"KOKORO_VOICE_BM_DANIEL"}]},{type:"category",label:"Types",items:[{type:"doc",id:"06-api-reference/enumerations/CocoLabel",label:"CocoLabel"},{type:"doc",id:"06-api-reference/enumerations/DeeplabLabel",label:"DeeplabLabel"},{type:"doc",id:"06-api-reference/enumerations/DownloadStatus",label:"DownloadStatus"},{type:"doc",id:"06-api-reference/enumerations/HTTP_CODE",label:"HTTP_CODE"},{type:"doc",id:"06-api-reference/enumerations/ScalarType",label:"ScalarType"},{type:"doc",id:"06-api-reference/enumerations/SelfieSegmentationLabel",label:"SelfieSegmentationLabel"},{type:"doc",id:"06-api-reference/enumerations/SourceType",label:"SourceType"},{type:"doc",id:"06-api-reference/interfaces/Bbox",label:"Bbox"},{type:"doc",id:"06-api-reference/interfaces/ChatConfig",label:"ChatConfig"},{type:"doc",id:"06-api-reference/interfaces/ClassificationProps",label:"ClassificationProps"},{type:"doc",id:"06-api-reference/interfaces/ClassificationType",label:"ClassificationType"},{type:"doc",id:"06-api-reference/interfaces/ContextStrategy",label:"ContextStrategy"},{type:"doc",id:"06-api-reference/interfaces/DecodingOptions",label:"DecodingOptions"},{type:"doc",id:"06-api-reference/interfaces/Detection",label:"Detection"},{type:"doc",id:"06-api-reference/interfaces/ExecutorchModuleProps",label:"ExecutorchModuleProps"},{type:"doc",id:"06-api-reference/interfaces/ExecutorchModuleType",label:"ExecutorchModuleType"},{type:"doc",id:"06-api-reference/interfaces/GenerationConfig",label:"GenerationConfig"},{type:"doc",id:"06-api-reference/interfaces/ImageEmbeddingsProps",label:"ImageEmbeddingsProps"},{type:"doc",id:"06-api-reference/interfaces/ImageEmbeddingsType",label:"ImageEmbeddingsType"},{type:"doc",id:"06-api-reference/interfaces/ImageSegmentationProps",label:"ImageSegmentationProps"},{type:"doc",id:"06-api-reference/interfaces/ImageSegmentationType",label:"ImageSegmentationType"},{type:"doc",id:"06-api-reference/interfaces/KokoroConfig",label:"KokoroConfig"},{type:"doc",id:"06-api-reference/interfaces/KokoroVoiceExtras",label:"KokoroVoiceExtras"},{type:"doc",id:"06-api-reference/interfaces/LLMConfig",label:"LLMConfig"},{type:"doc",id:"06-api-reference/interfaces/LLMProps",label:"LLMProps"},{type:"doc",id:"06-api-reference/interfaces/LLMType",label:"LLMType"},{type:"doc",id:"06-api-reference/interfaces/Message",label:"Message"},{type:"doc",id:"06-api-reference/interfaces/ObjectDetectionProps",label:"ObjectDetectionProps"},{type:"doc",id:"06-api-reference/interfaces/ObjectDetectionType",label:"ObjectDetectionType"},{type:"doc",id:"06-api-reference/interfaces/OCRDetection",label:"OCRDetection"},{type:"doc",id:"06-api-reference/interfaces/OCRProps",label:"OCRProps"},{type:"doc",id:"06-api-reference/interfaces/OCRType",label:"OCRType"},{type:"doc",id:"06-api-reference/interfaces/Point",label:"Point"},{type:"doc",id:"06-api-reference/interfaces/Segment",label:"Segment"},{type:"doc",id:"06-api-reference/interfaces/SpeechToTextModelConfig",label:"SpeechToTextModelConfig"},{type:"doc",id:"06-api-reference/interfaces/SpeechToTextProps",label:"SpeechToTextProps"},{type:"doc",id:"06-api-reference/interfaces/SpeechToTextType",label:"SpeechToTextType"},{type:"doc",id:"06-api-reference/interfaces/StyleTransferProps",label:"StyleTransferProps"},{type:"doc",id:"06-api-reference/interfaces/StyleTransferType",label:"StyleTransferType"},{type:"doc",id:"06-api-reference/interfaces/TensorPtr",label:"TensorPtr"},{type:"doc",id:"06-api-reference/interfaces/TextEmbeddingsProps",label:"TextEmbeddingsProps"},{type:"doc",id:"06-api-reference/interfaces/TextEmbeddingsType",label:"TextEmbeddingsType"},{type:"doc",id:"06-api-reference/interfaces/TextToImageProps",label:"TextToImageProps"},{type:"doc",id:"06-api-reference/interfaces/TextToImageType",label:"TextToImageType"},{type:"doc",id:"06-api-reference/interfaces/TextToSpeechConfig",label:"TextToSpeechConfig"},{type:"doc",id:"06-api-reference/interfaces/TextToSpeechInput",label:"TextToSpeechInput"},{type:"doc",id:"06-api-reference/interfaces/TextToSpeechProps",label:"TextToSpeechProps"},{type:"doc",id:"06-api-reference/interfaces/TextToSpeechStreamingInput",label:"TextToSpeechStreamingInput"},{type:"doc",id:"06-api-reference/interfaces/TextToSpeechType",label:"TextToSpeechType"},{type:"doc",id:"06-api-reference/interfaces/TokenizerProps",label:"TokenizerProps"},{type:"doc",id:"06-api-reference/interfaces/TokenizerType",label:"TokenizerType"},{type:"doc",id:"06-api-reference/interfaces/ToolCall",label:"ToolCall"},{type:"doc",id:"06-api-reference/interfaces/ToolsConfig",label:"ToolsConfig"},{type:"doc",id:"06-api-reference/interfaces/TranscriptionResult",label:"TranscriptionResult"},{type:"doc",id:"06-api-reference/interfaces/TranscriptionSegment",label:"TranscriptionSegment"},{type:"doc",id:"06-api-reference/interfaces/VADProps",label:"VADProps"},{type:"doc",id:"06-api-reference/interfaces/VADType",label:"VADType"},{type:"doc",id:"06-api-reference/interfaces/VerticalOCRProps",label:"VerticalOCRProps"},{type:"doc",id:"06-api-reference/interfaces/VoiceConfig",label:"VoiceConfig"},{type:"doc",id:"06-api-reference/interfaces/Word",label:"Word"},{type:"doc",id:"06-api-reference/type-aliases/LabelEnum",label:"LabelEnum"},{type:"doc",id:"06-api-reference/type-aliases/LLMTool",label:"LLMTool"},{type:"doc",id:"06-api-reference/type-aliases/MessageRole",label:"MessageRole"},{type:"doc",id:"06-api-reference/type-aliases/ModelNameOf",label:"ModelNameOf"},{type:"doc",id:"06-api-reference/type-aliases/ModelSources",label:"ModelSources"},{type:"doc",id:"06-api-reference/type-aliases/OCRLanguage",label:"OCRLanguage"},{type:"doc",id:"06-api-reference/type-aliases/ResourceSource",label:"ResourceSource"},{type:"doc",id:"06-api-reference/type-aliases/SegmentationConfig",label:"SegmentationConfig"},{type:"doc",id:"06-api-reference/type-aliases/SegmentationLabels",label:"SegmentationLabels"},{type:"doc",id:"06-api-reference/type-aliases/SegmentationModelName",label:"SegmentationModelName"},{type:"doc",id:"06-api-reference/type-aliases/SpeechToTextLanguage",label:"SpeechToTextLanguage"},{type:"doc",id:"06-api-reference/type-aliases/TensorBuffer",label:"TensorBuffer"},{type:"doc",id:"06-api-reference/type-aliases/TextToSpeechLanguage",label:"TextToSpeechLanguage"},{type:"doc",id:"06-api-reference/type-aliases/Triple",label:"Triple"},{type:"doc",id:"06-api-reference/variables/SPECIAL_TOKENS",label:"SPECIAL_TOKENS"}]},{type:"category",label:"Typescript API",items:[{type:"doc",id:"06-api-reference/classes/ClassificationModule",label:"ClassificationModule"},{type:"doc",id:"06-api-reference/classes/ExecutorchModule",label:"ExecutorchModule"},{type:"doc",id:"06-api-reference/classes/ImageEmbeddingsModule",label:"ImageEmbeddingsModule"},{type:"doc",id:"06-api-reference/classes/ImageSegmentationModule",label:"ImageSegmentationModule"},{type:"doc",id:"06-api-reference/classes/LLMModule",label:"LLMModule"},{type:"doc",id:"06-api-reference/classes/ObjectDetectionModule",label:"ObjectDetectionModule"},{type:"doc",id:"06-api-reference/classes/OCRModule",label:"OCRModule"},{type:"doc",id:"06-api-reference/classes/SpeechToTextModule",label:"SpeechToTextModule"},{type:"doc",id:"06-api-reference/classes/StyleTransferModule",label:"StyleTransferModule"},{type:"doc",id:"06-api-reference/classes/TextEmbeddingsModule",label:"TextEmbeddingsModule"},{type:"doc",id:"06-api-reference/classes/TextToImageModule",label:"TextToImageModule"},{type:"doc",id:"06-api-reference/classes/TextToSpeechModule",label:"TextToSpeechModule"},{type:"doc",id:"06-api-reference/classes/TokenizerModule",label:"TokenizerModule"},{type:"doc",id:"06-api-reference/classes/VADModule",label:"VADModule"},{type:"doc",id:"06-api-reference/classes/VerticalOCRModule",label:"VerticalOCRModule"}]},{type:"category",label:"Utilities - General",items:[{type:"category",label:"ResourceFetcherUtils",items:[{type:"category",label:"Functions",items:[{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/functions/calculateDownloadProgress",label:"calculateDownloadProgress"},{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/functions/getFilenameFromUri",label:"getFilenameFromUri"},{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/functions/hashObject",label:"hashObject"},{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/functions/removeFilePrefix",label:"removeFilePrefix"},{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/functions/triggerHuggingFaceDownloadCounter",label:"triggerHuggingFaceDownloadCounter"}]}],link:{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/index"}},{type:"doc",id:"06-api-reference/classes/ResourceFetcher",label:"ResourceFetcher"},{type:"doc",id:"06-api-reference/interfaces/ExecutorchConfig",label:"ExecutorchConfig"},{type:"doc",id:"06-api-reference/interfaces/ResourceFetcherAdapter",label:"ResourceFetcherAdapter"},{type:"doc",id:"06-api-reference/functions/cleanupExecutorch",label:"cleanupExecutorch"},{type:"doc",id:"06-api-reference/functions/initExecutorch",label:"initExecutorch"}]},{type:"category",label:"Utilities - LLM",items:[{type:"doc",id:"06-api-reference/variables/DEFAULT_CHAT_CONFIG",label:"DEFAULT_CHAT_CONFIG"},{type:"doc",id:"06-api-reference/variables/DEFAULT_CONTEXT_BUFFER_TOKENS",label:"DEFAULT_CONTEXT_BUFFER_TOKENS"},{type:"doc",id:"06-api-reference/variables/DEFAULT_MESSAGE_HISTORY",label:"DEFAULT_MESSAGE_HISTORY"},{type:"doc",id:"06-api-reference/variables/DEFAULT_SYSTEM_PROMPT",label:"DEFAULT_SYSTEM_PROMPT"},{type:"doc",id:"06-api-reference/variables/parseToolCall",label:"parseToolCall"},{type:"doc",id:"06-api-reference/functions/DEFAULT_STRUCTURED_OUTPUT_PROMPT",label:"DEFAULT_STRUCTURED_OUTPUT_PROMPT"},{type:"doc",id:"06-api-reference/functions/fixAndValidateStructuredOutput",label:"fixAndValidateStructuredOutput"},{type:"doc",id:"06-api-reference/functions/getStructuredOutputPrompt",label:"getStructuredOutputPrompt"}]},{type:"category",label:"Utils",items:[{type:"doc",id:"06-api-reference/classes/MessageCountContextStrategy",label:"MessageCountContextStrategy"},{type:"doc",id:"06-api-reference/classes/NoopContextStrategy",label:"NoopContextStrategy"},{type:"doc",id:"06-api-reference/classes/SlidingWindowContextStrategy",label:"SlidingWindowContextStrategy"}]}]}; +const typedocSidebar = {items:[{type:"category",label:"Hooks",items:[{type:"doc",id:"06-api-reference/functions/useClassification",label:"useClassification"},{type:"doc",id:"06-api-reference/functions/useExecutorchModule",label:"useExecutorchModule"},{type:"doc",id:"06-api-reference/functions/useImageEmbeddings",label:"useImageEmbeddings"},{type:"doc",id:"06-api-reference/functions/useImageSegmentation",label:"useImageSegmentation"},{type:"doc",id:"06-api-reference/functions/useLLM",label:"useLLM"},{type:"doc",id:"06-api-reference/functions/useObjectDetection",label:"useObjectDetection"},{type:"doc",id:"06-api-reference/functions/useOCR",label:"useOCR"},{type:"doc",id:"06-api-reference/functions/useSpeechToText",label:"useSpeechToText"},{type:"doc",id:"06-api-reference/functions/useStyleTransfer",label:"useStyleTransfer"},{type:"doc",id:"06-api-reference/functions/useTextEmbeddings",label:"useTextEmbeddings"},{type:"doc",id:"06-api-reference/functions/useTextToImage",label:"useTextToImage"},{type:"doc",id:"06-api-reference/functions/useTextToSpeech",label:"useTextToSpeech"},{type:"doc",id:"06-api-reference/functions/useTokenizer",label:"useTokenizer"},{type:"doc",id:"06-api-reference/functions/useVAD",label:"useVAD"},{type:"doc",id:"06-api-reference/functions/useVerticalOCR",label:"useVerticalOCR"}]},{type:"category",label:"Interfaces",items:[{type:"doc",id:"06-api-reference/interfaces/ResourceSourceExtended",label:"ResourceSourceExtended"}]},{type:"category",label:"Models - Classification",items:[{type:"doc",id:"06-api-reference/variables/EFFICIENTNET_V2_S",label:"EFFICIENTNET_V2_S"}]},{type:"category",label:"Models - Image Embeddings",items:[{type:"doc",id:"06-api-reference/variables/CLIP_VIT_BASE_PATCH32_IMAGE",label:"CLIP_VIT_BASE_PATCH32_IMAGE"}]},{type:"category",label:"Models - Image Generation",items:[{type:"doc",id:"06-api-reference/variables/BK_SDM_TINY_VPRED_256",label:"BK_SDM_TINY_VPRED_256"},{type:"doc",id:"06-api-reference/variables/BK_SDM_TINY_VPRED_512",label:"BK_SDM_TINY_VPRED_512"}]},{type:"category",label:"Models - Image Segmentation",items:[{type:"doc",id:"06-api-reference/variables/DEEPLAB_V3_RESNET50",label:"DEEPLAB_V3_RESNET50"},{type:"doc",id:"06-api-reference/variables/SELFIE_SEGMENTATION",label:"SELFIE_SEGMENTATION"}]},{type:"category",label:"Models - LMM",items:[{type:"doc",id:"06-api-reference/variables/HAMMER2_1_0_5B",label:"HAMMER2_1_0_5B"},{type:"doc",id:"06-api-reference/variables/HAMMER2_1_0_5B_QUANTIZED",label:"HAMMER2_1_0_5B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/HAMMER2_1_1_5B",label:"HAMMER2_1_1_5B"},{type:"doc",id:"06-api-reference/variables/HAMMER2_1_1_5B_QUANTIZED",label:"HAMMER2_1_1_5B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/HAMMER2_1_3B",label:"HAMMER2_1_3B"},{type:"doc",id:"06-api-reference/variables/HAMMER2_1_3B_QUANTIZED",label:"HAMMER2_1_3B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/LFM2_5_1_2B_INSTRUCT",label:"LFM2_5_1_2B_INSTRUCT"},{type:"doc",id:"06-api-reference/variables/LFM2_5_1_2B_INSTRUCT_QUANTIZED",label:"LFM2_5_1_2B_INSTRUCT_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_1B",label:"LLAMA3_2_1B"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_1B_QLORA",label:"LLAMA3_2_1B_QLORA"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_1B_SPINQUANT",label:"LLAMA3_2_1B_SPINQUANT"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_3B",label:"LLAMA3_2_3B"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_3B_QLORA",label:"LLAMA3_2_3B_QLORA"},{type:"doc",id:"06-api-reference/variables/LLAMA3_2_3B_SPINQUANT",label:"LLAMA3_2_3B_SPINQUANT"},{type:"doc",id:"06-api-reference/variables/PHI_4_MINI_4B",label:"PHI_4_MINI_4B"},{type:"doc",id:"06-api-reference/variables/PHI_4_MINI_4B_QUANTIZED",label:"PHI_4_MINI_4B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_0_5B",label:"QWEN2_5_0_5B"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_0_5B_QUANTIZED",label:"QWEN2_5_0_5B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_1_5B",label:"QWEN2_5_1_5B"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_1_5B_QUANTIZED",label:"QWEN2_5_1_5B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_3B",label:"QWEN2_5_3B"},{type:"doc",id:"06-api-reference/variables/QWEN2_5_3B_QUANTIZED",label:"QWEN2_5_3B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN3_0_6B",label:"QWEN3_0_6B"},{type:"doc",id:"06-api-reference/variables/QWEN3_0_6B_QUANTIZED",label:"QWEN3_0_6B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN3_1_7B",label:"QWEN3_1_7B"},{type:"doc",id:"06-api-reference/variables/QWEN3_1_7B_QUANTIZED",label:"QWEN3_1_7B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/QWEN3_4B",label:"QWEN3_4B"},{type:"doc",id:"06-api-reference/variables/QWEN3_4B_QUANTIZED",label:"QWEN3_4B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_1_7B",label:"SMOLLM2_1_1_7B"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_1_7B_QUANTIZED",label:"SMOLLM2_1_1_7B_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_135M",label:"SMOLLM2_1_135M"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_135M_QUANTIZED",label:"SMOLLM2_1_135M_QUANTIZED"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_360M",label:"SMOLLM2_1_360M"},{type:"doc",id:"06-api-reference/variables/SMOLLM2_1_360M_QUANTIZED",label:"SMOLLM2_1_360M_QUANTIZED"}]},{type:"category",label:"Models - Object Detection",items:[{type:"doc",id:"06-api-reference/variables/SSDLITE_320_MOBILENET_V3_LARGE",label:"SSDLITE_320_MOBILENET_V3_LARGE"}]},{type:"category",label:"Models - Speech To Text",items:[{type:"doc",id:"06-api-reference/variables/WHISPER_BASE",label:"WHISPER_BASE"},{type:"doc",id:"06-api-reference/variables/WHISPER_BASE_EN",label:"WHISPER_BASE_EN"},{type:"doc",id:"06-api-reference/variables/WHISPER_SMALL",label:"WHISPER_SMALL"},{type:"doc",id:"06-api-reference/variables/WHISPER_SMALL_EN",label:"WHISPER_SMALL_EN"},{type:"doc",id:"06-api-reference/variables/WHISPER_TINY",label:"WHISPER_TINY"},{type:"doc",id:"06-api-reference/variables/WHISPER_TINY_EN",label:"WHISPER_TINY_EN"},{type:"doc",id:"06-api-reference/variables/WHISPER_TINY_EN_QUANTIZED",label:"WHISPER_TINY_EN_QUANTIZED"}]},{type:"category",label:"Models - Style Transfer",items:[{type:"doc",id:"06-api-reference/variables/STYLE_TRANSFER_CANDY",label:"STYLE_TRANSFER_CANDY"},{type:"doc",id:"06-api-reference/variables/STYLE_TRANSFER_MOSAIC",label:"STYLE_TRANSFER_MOSAIC"},{type:"doc",id:"06-api-reference/variables/STYLE_TRANSFER_RAIN_PRINCESS",label:"STYLE_TRANSFER_RAIN_PRINCESS"},{type:"doc",id:"06-api-reference/variables/STYLE_TRANSFER_UDNIE",label:"STYLE_TRANSFER_UDNIE"}]},{type:"category",label:"Models - Text Embeddings",items:[{type:"doc",id:"06-api-reference/variables/ALL_MINILM_L6_V2",label:"ALL_MINILM_L6_V2"},{type:"doc",id:"06-api-reference/variables/ALL_MPNET_BASE_V2",label:"ALL_MPNET_BASE_V2"},{type:"doc",id:"06-api-reference/variables/CLIP_VIT_BASE_PATCH32_TEXT",label:"CLIP_VIT_BASE_PATCH32_TEXT"},{type:"doc",id:"06-api-reference/variables/MULTI_QA_MINILM_L6_COS_V1",label:"MULTI_QA_MINILM_L6_COS_V1"},{type:"doc",id:"06-api-reference/variables/MULTI_QA_MPNET_BASE_DOT_V1",label:"MULTI_QA_MPNET_BASE_DOT_V1"}]},{type:"category",label:"Models - Text to Speech",items:[{type:"doc",id:"06-api-reference/variables/KOKORO_MEDIUM",label:"KOKORO_MEDIUM"},{type:"doc",id:"06-api-reference/variables/KOKORO_SMALL",label:"KOKORO_SMALL"}]},{type:"category",label:"Models - Voice Activity Detection",items:[{type:"doc",id:"06-api-reference/variables/FSMN_VAD",label:"FSMN_VAD"}]},{type:"category",label:"OCR Supported Alphabets",items:[{type:"doc",id:"06-api-reference/variables/OCR_ABAZA",label:"OCR_ABAZA"},{type:"doc",id:"06-api-reference/variables/OCR_ADYGHE",label:"OCR_ADYGHE"},{type:"doc",id:"06-api-reference/variables/OCR_AFRIKAANS",label:"OCR_AFRIKAANS"},{type:"doc",id:"06-api-reference/variables/OCR_ALBANIAN",label:"OCR_ALBANIAN"},{type:"doc",id:"06-api-reference/variables/OCR_AVAR",label:"OCR_AVAR"},{type:"doc",id:"06-api-reference/variables/OCR_AZERBAIJANI",label:"OCR_AZERBAIJANI"},{type:"doc",id:"06-api-reference/variables/OCR_BELARUSIAN",label:"OCR_BELARUSIAN"},{type:"doc",id:"06-api-reference/variables/OCR_BOSNIAN",label:"OCR_BOSNIAN"},{type:"doc",id:"06-api-reference/variables/OCR_BULGARIAN",label:"OCR_BULGARIAN"},{type:"doc",id:"06-api-reference/variables/OCR_CHECHEN",label:"OCR_CHECHEN"},{type:"doc",id:"06-api-reference/variables/OCR_CROATIAN",label:"OCR_CROATIAN"},{type:"doc",id:"06-api-reference/variables/OCR_CZECH",label:"OCR_CZECH"},{type:"doc",id:"06-api-reference/variables/OCR_DANISH",label:"OCR_DANISH"},{type:"doc",id:"06-api-reference/variables/OCR_DARGWA",label:"OCR_DARGWA"},{type:"doc",id:"06-api-reference/variables/OCR_DUTCH",label:"OCR_DUTCH"},{type:"doc",id:"06-api-reference/variables/OCR_ENGLISH",label:"OCR_ENGLISH"},{type:"doc",id:"06-api-reference/variables/OCR_ESTONIAN",label:"OCR_ESTONIAN"},{type:"doc",id:"06-api-reference/variables/OCR_FRENCH",label:"OCR_FRENCH"},{type:"doc",id:"06-api-reference/variables/OCR_GERMAN",label:"OCR_GERMAN"},{type:"doc",id:"06-api-reference/variables/OCR_HUNGARIAN",label:"OCR_HUNGARIAN"},{type:"doc",id:"06-api-reference/variables/OCR_ICELANDIC",label:"OCR_ICELANDIC"},{type:"doc",id:"06-api-reference/variables/OCR_INDONESIAN",label:"OCR_INDONESIAN"},{type:"doc",id:"06-api-reference/variables/OCR_INGUSH",label:"OCR_INGUSH"},{type:"doc",id:"06-api-reference/variables/OCR_IRISH",label:"OCR_IRISH"},{type:"doc",id:"06-api-reference/variables/OCR_ITALIAN",label:"OCR_ITALIAN"},{type:"doc",id:"06-api-reference/variables/OCR_JAPANESE",label:"OCR_JAPANESE"},{type:"doc",id:"06-api-reference/variables/OCR_KANNADA",label:"OCR_KANNADA"},{type:"doc",id:"06-api-reference/variables/OCR_KARBADIAN",label:"OCR_KARBADIAN"},{type:"doc",id:"06-api-reference/variables/OCR_KOREAN",label:"OCR_KOREAN"},{type:"doc",id:"06-api-reference/variables/OCR_KURDISH",label:"OCR_KURDISH"},{type:"doc",id:"06-api-reference/variables/OCR_LAK",label:"OCR_LAK"},{type:"doc",id:"06-api-reference/variables/OCR_LATIN",label:"OCR_LATIN"},{type:"doc",id:"06-api-reference/variables/OCR_LATVIAN",label:"OCR_LATVIAN"},{type:"doc",id:"06-api-reference/variables/OCR_LEZGHIAN",label:"OCR_LEZGHIAN"},{type:"doc",id:"06-api-reference/variables/OCR_LITHUANIAN",label:"OCR_LITHUANIAN"},{type:"doc",id:"06-api-reference/variables/OCR_MALAY",label:"OCR_MALAY"},{type:"doc",id:"06-api-reference/variables/OCR_MALTESE",label:"OCR_MALTESE"},{type:"doc",id:"06-api-reference/variables/OCR_MAORI",label:"OCR_MAORI"},{type:"doc",id:"06-api-reference/variables/OCR_MONGOLIAN",label:"OCR_MONGOLIAN"},{type:"doc",id:"06-api-reference/variables/OCR_NORWEGIAN",label:"OCR_NORWEGIAN"},{type:"doc",id:"06-api-reference/variables/OCR_OCCITAN",label:"OCR_OCCITAN"},{type:"doc",id:"06-api-reference/variables/OCR_PALI",label:"OCR_PALI"},{type:"doc",id:"06-api-reference/variables/OCR_POLISH",label:"OCR_POLISH"},{type:"doc",id:"06-api-reference/variables/OCR_PORTUGUESE",label:"OCR_PORTUGUESE"},{type:"doc",id:"06-api-reference/variables/OCR_ROMANIAN",label:"OCR_ROMANIAN"},{type:"doc",id:"06-api-reference/variables/OCR_RUSSIAN",label:"OCR_RUSSIAN"},{type:"doc",id:"06-api-reference/variables/OCR_SERBIAN_CYRILLIC",label:"OCR_SERBIAN_CYRILLIC"},{type:"doc",id:"06-api-reference/variables/OCR_SERBIAN_LATIN",label:"OCR_SERBIAN_LATIN"},{type:"doc",id:"06-api-reference/variables/OCR_SIMPLIFIED_CHINESE",label:"OCR_SIMPLIFIED_CHINESE"},{type:"doc",id:"06-api-reference/variables/OCR_SLOVAK",label:"OCR_SLOVAK"},{type:"doc",id:"06-api-reference/variables/OCR_SLOVENIAN",label:"OCR_SLOVENIAN"},{type:"doc",id:"06-api-reference/variables/OCR_SPANISH",label:"OCR_SPANISH"},{type:"doc",id:"06-api-reference/variables/OCR_SWAHILI",label:"OCR_SWAHILI"},{type:"doc",id:"06-api-reference/variables/OCR_SWEDISH",label:"OCR_SWEDISH"},{type:"doc",id:"06-api-reference/variables/OCR_TABASSARAN",label:"OCR_TABASSARAN"},{type:"doc",id:"06-api-reference/variables/OCR_TAGALOG",label:"OCR_TAGALOG"},{type:"doc",id:"06-api-reference/variables/OCR_TAJIK",label:"OCR_TAJIK"},{type:"doc",id:"06-api-reference/variables/OCR_TELUGU",label:"OCR_TELUGU"},{type:"doc",id:"06-api-reference/variables/OCR_TURKISH",label:"OCR_TURKISH"},{type:"doc",id:"06-api-reference/variables/OCR_UKRAINIAN",label:"OCR_UKRAINIAN"},{type:"doc",id:"06-api-reference/variables/OCR_UZBEK",label:"OCR_UZBEK"},{type:"doc",id:"06-api-reference/variables/OCR_VIETNAMESE",label:"OCR_VIETNAMESE"},{type:"doc",id:"06-api-reference/variables/OCR_WELSH",label:"OCR_WELSH"}]},{type:"category",label:"Other",items:[{type:"doc",id:"06-api-reference/enumerations/RnExecutorchErrorCode",label:"RnExecutorchErrorCode"},{type:"doc",id:"06-api-reference/classes/Logger",label:"Logger"},{type:"doc",id:"06-api-reference/classes/RnExecutorchError",label:"RnExecutorchError"},{type:"doc",id:"06-api-reference/interfaces/Frame",label:"Frame"}]},{type:"category",label:"TTS Supported Voices",items:[{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AF_HEART",label:"KOKORO_VOICE_AF_HEART"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AF_RIVER",label:"KOKORO_VOICE_AF_RIVER"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AF_SARAH",label:"KOKORO_VOICE_AF_SARAH"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AM_ADAM",label:"KOKORO_VOICE_AM_ADAM"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AM_MICHAEL",label:"KOKORO_VOICE_AM_MICHAEL"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_AM_SANTA",label:"KOKORO_VOICE_AM_SANTA"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_BF_EMMA",label:"KOKORO_VOICE_BF_EMMA"},{type:"doc",id:"06-api-reference/variables/KOKORO_VOICE_BM_DANIEL",label:"KOKORO_VOICE_BM_DANIEL"}]},{type:"category",label:"Types",items:[{type:"doc",id:"06-api-reference/enumerations/CocoLabel",label:"CocoLabel"},{type:"doc",id:"06-api-reference/enumerations/DeeplabLabel",label:"DeeplabLabel"},{type:"doc",id:"06-api-reference/enumerations/DownloadStatus",label:"DownloadStatus"},{type:"doc",id:"06-api-reference/enumerations/HTTP_CODE",label:"HTTP_CODE"},{type:"doc",id:"06-api-reference/enumerations/ScalarType",label:"ScalarType"},{type:"doc",id:"06-api-reference/enumerations/SelfieSegmentationLabel",label:"SelfieSegmentationLabel"},{type:"doc",id:"06-api-reference/enumerations/SourceType",label:"SourceType"},{type:"doc",id:"06-api-reference/interfaces/Bbox",label:"Bbox"},{type:"doc",id:"06-api-reference/interfaces/ChatConfig",label:"ChatConfig"},{type:"doc",id:"06-api-reference/interfaces/ClassificationProps",label:"ClassificationProps"},{type:"doc",id:"06-api-reference/interfaces/ClassificationType",label:"ClassificationType"},{type:"doc",id:"06-api-reference/interfaces/ContextStrategy",label:"ContextStrategy"},{type:"doc",id:"06-api-reference/interfaces/DecodingOptions",label:"DecodingOptions"},{type:"doc",id:"06-api-reference/interfaces/Detection",label:"Detection"},{type:"doc",id:"06-api-reference/interfaces/ExecutorchModuleProps",label:"ExecutorchModuleProps"},{type:"doc",id:"06-api-reference/interfaces/ExecutorchModuleType",label:"ExecutorchModuleType"},{type:"doc",id:"06-api-reference/interfaces/GenerationConfig",label:"GenerationConfig"},{type:"doc",id:"06-api-reference/interfaces/ImageEmbeddingsProps",label:"ImageEmbeddingsProps"},{type:"doc",id:"06-api-reference/interfaces/ImageEmbeddingsType",label:"ImageEmbeddingsType"},{type:"doc",id:"06-api-reference/interfaces/ImageSegmentationProps",label:"ImageSegmentationProps"},{type:"doc",id:"06-api-reference/interfaces/ImageSegmentationType",label:"ImageSegmentationType"},{type:"doc",id:"06-api-reference/interfaces/KokoroConfig",label:"KokoroConfig"},{type:"doc",id:"06-api-reference/interfaces/KokoroVoiceExtras",label:"KokoroVoiceExtras"},{type:"doc",id:"06-api-reference/interfaces/LLMConfig",label:"LLMConfig"},{type:"doc",id:"06-api-reference/interfaces/LLMProps",label:"LLMProps"},{type:"doc",id:"06-api-reference/interfaces/LLMType",label:"LLMType"},{type:"doc",id:"06-api-reference/interfaces/Message",label:"Message"},{type:"doc",id:"06-api-reference/interfaces/ObjectDetectionProps",label:"ObjectDetectionProps"},{type:"doc",id:"06-api-reference/interfaces/ObjectDetectionType",label:"ObjectDetectionType"},{type:"doc",id:"06-api-reference/interfaces/OCRDetection",label:"OCRDetection"},{type:"doc",id:"06-api-reference/interfaces/OCRProps",label:"OCRProps"},{type:"doc",id:"06-api-reference/interfaces/OCRType",label:"OCRType"},{type:"doc",id:"06-api-reference/interfaces/PixelData",label:"PixelData"},{type:"doc",id:"06-api-reference/interfaces/Point",label:"Point"},{type:"doc",id:"06-api-reference/interfaces/Segment",label:"Segment"},{type:"doc",id:"06-api-reference/interfaces/SpeechToTextModelConfig",label:"SpeechToTextModelConfig"},{type:"doc",id:"06-api-reference/interfaces/SpeechToTextProps",label:"SpeechToTextProps"},{type:"doc",id:"06-api-reference/interfaces/SpeechToTextType",label:"SpeechToTextType"},{type:"doc",id:"06-api-reference/interfaces/StyleTransferProps",label:"StyleTransferProps"},{type:"doc",id:"06-api-reference/interfaces/StyleTransferType",label:"StyleTransferType"},{type:"doc",id:"06-api-reference/interfaces/TensorPtr",label:"TensorPtr"},{type:"doc",id:"06-api-reference/interfaces/TextEmbeddingsProps",label:"TextEmbeddingsProps"},{type:"doc",id:"06-api-reference/interfaces/TextEmbeddingsType",label:"TextEmbeddingsType"},{type:"doc",id:"06-api-reference/interfaces/TextToImageProps",label:"TextToImageProps"},{type:"doc",id:"06-api-reference/interfaces/TextToImageType",label:"TextToImageType"},{type:"doc",id:"06-api-reference/interfaces/TextToSpeechConfig",label:"TextToSpeechConfig"},{type:"doc",id:"06-api-reference/interfaces/TextToSpeechInput",label:"TextToSpeechInput"},{type:"doc",id:"06-api-reference/interfaces/TextToSpeechProps",label:"TextToSpeechProps"},{type:"doc",id:"06-api-reference/interfaces/TextToSpeechStreamingInput",label:"TextToSpeechStreamingInput"},{type:"doc",id:"06-api-reference/interfaces/TextToSpeechType",label:"TextToSpeechType"},{type:"doc",id:"06-api-reference/interfaces/TokenizerProps",label:"TokenizerProps"},{type:"doc",id:"06-api-reference/interfaces/TokenizerType",label:"TokenizerType"},{type:"doc",id:"06-api-reference/interfaces/ToolCall",label:"ToolCall"},{type:"doc",id:"06-api-reference/interfaces/ToolsConfig",label:"ToolsConfig"},{type:"doc",id:"06-api-reference/interfaces/TranscriptionResult",label:"TranscriptionResult"},{type:"doc",id:"06-api-reference/interfaces/TranscriptionSegment",label:"TranscriptionSegment"},{type:"doc",id:"06-api-reference/interfaces/VADProps",label:"VADProps"},{type:"doc",id:"06-api-reference/interfaces/VADType",label:"VADType"},{type:"doc",id:"06-api-reference/interfaces/VerticalOCRProps",label:"VerticalOCRProps"},{type:"doc",id:"06-api-reference/interfaces/VoiceConfig",label:"VoiceConfig"},{type:"doc",id:"06-api-reference/interfaces/Word",label:"Word"},{type:"doc",id:"06-api-reference/type-aliases/LabelEnum",label:"LabelEnum"},{type:"doc",id:"06-api-reference/type-aliases/LLMTool",label:"LLMTool"},{type:"doc",id:"06-api-reference/type-aliases/MessageRole",label:"MessageRole"},{type:"doc",id:"06-api-reference/type-aliases/ModelNameOf",label:"ModelNameOf"},{type:"doc",id:"06-api-reference/type-aliases/ModelSources",label:"ModelSources"},{type:"doc",id:"06-api-reference/type-aliases/OCRLanguage",label:"OCRLanguage"},{type:"doc",id:"06-api-reference/type-aliases/ResourceSource",label:"ResourceSource"},{type:"doc",id:"06-api-reference/type-aliases/SegmentationConfig",label:"SegmentationConfig"},{type:"doc",id:"06-api-reference/type-aliases/SegmentationLabels",label:"SegmentationLabels"},{type:"doc",id:"06-api-reference/type-aliases/SegmentationModelName",label:"SegmentationModelName"},{type:"doc",id:"06-api-reference/type-aliases/SpeechToTextLanguage",label:"SpeechToTextLanguage"},{type:"doc",id:"06-api-reference/type-aliases/TensorBuffer",label:"TensorBuffer"},{type:"doc",id:"06-api-reference/type-aliases/TextToSpeechLanguage",label:"TextToSpeechLanguage"},{type:"doc",id:"06-api-reference/type-aliases/Triple",label:"Triple"},{type:"doc",id:"06-api-reference/variables/SPECIAL_TOKENS",label:"SPECIAL_TOKENS"}]},{type:"category",label:"Typescript API",items:[{type:"doc",id:"06-api-reference/classes/ClassificationModule",label:"ClassificationModule"},{type:"doc",id:"06-api-reference/classes/ExecutorchModule",label:"ExecutorchModule"},{type:"doc",id:"06-api-reference/classes/ImageEmbeddingsModule",label:"ImageEmbeddingsModule"},{type:"doc",id:"06-api-reference/classes/ImageSegmentationModule",label:"ImageSegmentationModule"},{type:"doc",id:"06-api-reference/classes/LLMModule",label:"LLMModule"},{type:"doc",id:"06-api-reference/classes/ObjectDetectionModule",label:"ObjectDetectionModule"},{type:"doc",id:"06-api-reference/classes/OCRModule",label:"OCRModule"},{type:"doc",id:"06-api-reference/classes/SpeechToTextModule",label:"SpeechToTextModule"},{type:"doc",id:"06-api-reference/classes/StyleTransferModule",label:"StyleTransferModule"},{type:"doc",id:"06-api-reference/classes/TextEmbeddingsModule",label:"TextEmbeddingsModule"},{type:"doc",id:"06-api-reference/classes/TextToImageModule",label:"TextToImageModule"},{type:"doc",id:"06-api-reference/classes/TextToSpeechModule",label:"TextToSpeechModule"},{type:"doc",id:"06-api-reference/classes/TokenizerModule",label:"TokenizerModule"},{type:"doc",id:"06-api-reference/classes/VADModule",label:"VADModule"},{type:"doc",id:"06-api-reference/classes/VerticalOCRModule",label:"VerticalOCRModule"}]},{type:"category",label:"Utilities - General",items:[{type:"category",label:"ResourceFetcherUtils",items:[{type:"category",label:"Functions",items:[{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/functions/calculateDownloadProgress",label:"calculateDownloadProgress"},{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/functions/getFilenameFromUri",label:"getFilenameFromUri"},{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/functions/hashObject",label:"hashObject"},{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/functions/removeFilePrefix",label:"removeFilePrefix"},{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/functions/triggerHuggingFaceDownloadCounter",label:"triggerHuggingFaceDownloadCounter"}]}],link:{type:"doc",id:"06-api-reference/react-native-executorch/namespaces/ResourceFetcherUtils/index"}},{type:"doc",id:"06-api-reference/classes/ResourceFetcher",label:"ResourceFetcher"},{type:"doc",id:"06-api-reference/interfaces/ExecutorchConfig",label:"ExecutorchConfig"},{type:"doc",id:"06-api-reference/interfaces/ResourceFetcherAdapter",label:"ResourceFetcherAdapter"},{type:"doc",id:"06-api-reference/functions/cleanupExecutorch",label:"cleanupExecutorch"},{type:"doc",id:"06-api-reference/functions/initExecutorch",label:"initExecutorch"}]},{type:"category",label:"Utilities - LLM",items:[{type:"doc",id:"06-api-reference/variables/DEFAULT_CHAT_CONFIG",label:"DEFAULT_CHAT_CONFIG"},{type:"doc",id:"06-api-reference/variables/DEFAULT_CONTEXT_BUFFER_TOKENS",label:"DEFAULT_CONTEXT_BUFFER_TOKENS"},{type:"doc",id:"06-api-reference/variables/DEFAULT_MESSAGE_HISTORY",label:"DEFAULT_MESSAGE_HISTORY"},{type:"doc",id:"06-api-reference/variables/DEFAULT_SYSTEM_PROMPT",label:"DEFAULT_SYSTEM_PROMPT"},{type:"doc",id:"06-api-reference/variables/parseToolCall",label:"parseToolCall"},{type:"doc",id:"06-api-reference/functions/DEFAULT_STRUCTURED_OUTPUT_PROMPT",label:"DEFAULT_STRUCTURED_OUTPUT_PROMPT"},{type:"doc",id:"06-api-reference/functions/fixAndValidateStructuredOutput",label:"fixAndValidateStructuredOutput"},{type:"doc",id:"06-api-reference/functions/getStructuredOutputPrompt",label:"getStructuredOutputPrompt"}]},{type:"category",label:"Utils",items:[{type:"doc",id:"06-api-reference/classes/MessageCountContextStrategy",label:"MessageCountContextStrategy"},{type:"doc",id:"06-api-reference/classes/NoopContextStrategy",label:"NoopContextStrategy"},{type:"doc",id:"06-api-reference/classes/SlidingWindowContextStrategy",label:"SlidingWindowContextStrategy"}]}]}; module.exports = typedocSidebar.items; \ No newline at end of file From 5ddad2fe1dd20f04eac4854d46c734e437203486 Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Thu, 12 Feb 2026 14:24:02 +0100 Subject: [PATCH 23/37] feat: frame extractor for zero-copy approach --- .../common/rnexecutorch/utils/FrameExtractor.cpp | 2 +- .../common/rnexecutorch/utils/FrameExtractor.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp index baae35dc3..c62d1b21c 100644 --- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.cpp @@ -111,4 +111,4 @@ cv::Mat extractFromNativeBuffer(uint64_t bufferPtr) { #endif } -} // namespace rnexecutorch::utils +} // namespace rnexecutorch::utils \ No newline at end of file diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.h b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.h index f5d7c2094..dda4ff956 100644 --- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.h +++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameExtractor.h @@ -22,4 +22,4 @@ namespace rnexecutorch::utils { */ cv::Mat extractFromNativeBuffer(uint64_t bufferPtr); -} // namespace rnexecutorch::utils +} // namespace rnexecutorch::utils \ No newline at end of file From 41866637ac0e5be340ee12f7a0285091b45bb543 Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Mon, 16 Feb 2026 10:37:11 +0100 Subject: [PATCH 24/37] feat: unify frame extraction and preprocessing --- .../common/rnexecutorch/models/VisionModel.cpp | 2 +- .../common/rnexecutorch/models/VisionModel.h | 2 +- .../rnexecutorch/models/classification/Classification.cpp | 2 +- .../common/rnexecutorch/utils/FrameProcessor.cpp | 2 +- .../common/rnexecutorch/utils/FrameProcessor.h | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp index b88310e12..c0ce049f2 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp @@ -50,4 +50,4 @@ cv::Mat VisionModel::extractFromPixels(const JSTensorViewIn &tensorView) const { return image; } -} // namespace rnexecutorch::models +} // namespace rnexecutorch::models \ No newline at end of file diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h index 82d544db3..e0ec03912 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h @@ -151,4 +151,4 @@ class VisionModel : public BaseModel { REGISTER_CONSTRUCTOR(models::VisionModel, std::string, std::shared_ptr); -} // namespace rnexecutorch +} // namespace rnexecutorch \ No newline at end of file diff --git a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp index 0fba07108..b9fad1b88 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp @@ -73,4 +73,4 @@ Classification::postprocess(const Tensor &tensor) { return probs; } -} // namespace rnexecutorch::models::classification +} // namespace rnexecutorch::models::classification \ No newline at end of file diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp index 30238ad5c..1d03b97ba 100644 --- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp @@ -25,4 +25,4 @@ cv::Mat extractFrame(jsi::Runtime &runtime, const jsi::Object &frameData) { return extractFromNativeBuffer(bufferPtr); } -} // namespace rnexecutorch::utils +} // namespace rnexecutorch::utils \ No newline at end of file diff --git a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h index 403f4bde9..6bbb3390d 100644 --- a/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h +++ b/packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.h @@ -24,4 +24,4 @@ using namespace facebook; */ cv::Mat extractFrame(jsi::Runtime &runtime, const jsi::Object &frameData); -} // namespace rnexecutorch::utils +} // namespace rnexecutorch::utils \ No newline at end of file From 6a89b0899e07989e09a3542f271904d7babf4448 Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Tue, 17 Feb 2026 13:05:14 +0100 Subject: [PATCH 25/37] feat: initial version of vision model API --- .../app/object_detection/index.tsx | 167 +++++++++++++++++- .../host_objects/ModelHostObject.h | 2 +- .../metaprogramming/TypeConcepts.h | 9 +- .../models/embeddings/image/ImageEmbeddings.h | 2 +- .../BaseImageSegmentation.h | 2 +- .../models/style_transfer/StyleTransfer.h | 2 +- .../computer_vision/ObjectDetectionModule.ts | 165 +++++++++++++---- 7 files changed, 298 insertions(+), 51 deletions(-) diff --git a/apps/computer-vision/app/object_detection/index.tsx b/apps/computer-vision/app/object_detection/index.tsx index 6a43dd920..9e60589fb 100644 --- a/apps/computer-vision/app/object_detection/index.tsx +++ b/apps/computer-vision/app/object_detection/index.tsx @@ -1,16 +1,66 @@ import Spinner from '../../components/Spinner'; -import { BottomBar } from '../../components/BottomBar'; import { getImage } from '../../utils'; import { Detection, useObjectDetection, SSDLITE_320_MOBILENET_V3_LARGE, } from 'react-native-executorch'; -import { View, StyleSheet, Image } from 'react-native'; +import { View, StyleSheet, Image, TouchableOpacity, Text } from 'react-native'; import ImageWithBboxes from '../../components/ImageWithBboxes'; import React, { useContext, useEffect, useState } from 'react'; import { GeneratingContext } from '../../context'; import ScreenWrapper from '../../ScreenWrapper'; +import ColorPalette from '../../colors'; +import { Images } from 'react-native-nitro-image'; + +// Helper function to convert image URI to raw pixel data using NitroImage +async function imageUriToPixelData( + uri: string, + targetWidth: number, + targetHeight: number +): Promise<{ + data: ArrayBuffer; + width: number; + height: number; + channels: number; +}> { + try { + // Load image and resize to target dimensions + const image = await Images.loadFromFileAsync(uri); + const resized = image.resize(targetWidth, targetHeight); + + // Get pixel data as ArrayBuffer (RGBA format) + const pixelData = resized.toRawPixelData(); + const buffer = + pixelData instanceof ArrayBuffer ? pixelData : pixelData.buffer; + + // Calculate actual buffer dimensions (accounts for device pixel ratio) + const bufferSize = buffer?.byteLength || 0; + const totalPixels = bufferSize / 4; // RGBA = 4 bytes per pixel + const aspectRatio = targetWidth / targetHeight; + const actualHeight = Math.sqrt(totalPixels / aspectRatio); + const actualWidth = totalPixels / actualHeight; + + console.log('Requested:', targetWidth, 'x', targetHeight); + console.log('Buffer size:', bufferSize); + console.log( + 'Actual dimensions:', + Math.round(actualWidth), + 'x', + Math.round(actualHeight) + ); + + return { + data: buffer, + width: Math.round(actualWidth), + height: Math.round(actualHeight), + channels: 4, // RGBA + }; + } catch (error) { + console.error('Error loading image with NitroImage:', error); + throw error; + } +} export default function ObjectDetectionScreen() { const [imageUri, setImageUri] = useState(''); @@ -42,10 +92,41 @@ export default function ObjectDetectionScreen() { const runForward = async () => { if (imageUri) { try { - const output = await ssdLite.forward(imageUri); + console.log('Running forward with string URI...'); + const output = await ssdLite.forward(imageUri, 0.5); + console.log('String URI result:', output.length, 'detections'); setResults(output); } catch (e) { - console.error(e); + console.error('Error in runForward:', e); + } + } + }; + + const runForwardPixels = async () => { + if (imageUri && imageDimensions) { + try { + console.log('Converting image to pixel data...'); + // Resize to 640x640 to avoid memory issues + const intermediateSize = 640; + const pixelData = await imageUriToPixelData( + imageUri, + intermediateSize, + intermediateSize + ); + + console.log('Running forward with pixel data...', { + width: pixelData.width, + height: pixelData.height, + channels: pixelData.channels, + dataSize: pixelData.data.byteLength, + }); + + // Run inference using unified forward() API + const output = await ssdLite.forward(pixelData, 0.5); + console.log('Pixel data result:', output.length, 'detections'); + setResults(output); + } catch (e) { + console.error('Error in runForwardPixels:', e); } } }; @@ -81,10 +162,41 @@ export default function ObjectDetectionScreen() { )} - + + {/* Custom bottom bar with two buttons */} + + + handleCameraPress(false)}> + πŸ“· Gallery + + + + + + Run (String) + + + + Run (Pixels) + + + ); } @@ -129,4 +241,43 @@ const styles = StyleSheet.create({ width: '100%', height: '100%', }, + bottomContainer: { + width: '100%', + gap: 15, + alignItems: 'center', + padding: 16, + flex: 1, + }, + bottomIconsContainer: { + flexDirection: 'row', + justifyContent: 'center', + width: '100%', + }, + iconText: { + fontSize: 16, + color: ColorPalette.primary, + }, + buttonsRow: { + flexDirection: 'row', + width: '100%', + gap: 10, + }, + button: { + height: 50, + justifyContent: 'center', + alignItems: 'center', + backgroundColor: ColorPalette.primary, + color: '#fff', + borderRadius: 8, + }, + halfButton: { + flex: 1, + }, + buttonDisabled: { + opacity: 0.5, + }, + buttonText: { + color: '#fff', + fontSize: 16, + }, }); diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h index d6489c9be..8b0384626 100644 --- a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h +++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h @@ -433,4 +433,4 @@ template class ModelHostObject : public JsiHostObject { std::shared_ptr callInvoker; }; -} // namespace rnexecutorch +} // namespace rnexecutorch \ No newline at end of file diff --git a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h index 2d7612f25..fdf8c9dba 100644 --- a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h +++ b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h @@ -12,8 +12,13 @@ template concept SameAs = std::is_same_v; template -concept HasGenerate = requires(T t) { - { &T::generate }; +concept HasGenerateFromString = requires(T t) { + { &T::generateFromString }; +}; + +template +concept HasGenerateFromPixels = requires(T t) { + { &T::generateFromPixels }; }; template diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h index 7e114e939..9a1d6429b 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h @@ -27,4 +27,4 @@ class ImageEmbeddings final : public BaseEmbeddings { REGISTER_CONSTRUCTOR(models::embeddings::ImageEmbeddings, std::string, std::shared_ptr); -} // namespace rnexecutorch +} // namespace rnexecutorch \ No newline at end of file diff --git a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h index f46f41d69..34ad8dffd 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h @@ -58,4 +58,4 @@ class BaseImageSegmentation : public BaseModel { REGISTER_CONSTRUCTOR(models::image_segmentation::BaseImageSegmentation, std::string, std::vector, std::vector, std::shared_ptr); -} // namespace rnexecutorch +} // namespace rnexecutorch \ No newline at end of file diff --git a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h index 73744c4d8..8eed3c888 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h @@ -33,4 +33,4 @@ class StyleTransfer : public BaseModel { REGISTER_CONSTRUCTOR(models::style_transfer::StyleTransfer, std::string, std::shared_ptr); -} // namespace rnexecutorch +} // namespace rnexecutorch \ No newline at end of file diff --git a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts index f056cff62..762d09987 100644 --- a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts +++ b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts @@ -1,52 +1,143 @@ -import { ResourceFetcher } from '../../utils/ResourceFetcher'; -import { ResourceSource, PixelData } from '../../types/common'; -import { Detection } from '../../types/objectDetection'; +import { BaseModule } from '../BaseModule'; import { RnExecutorchErrorCode } from '../../errors/ErrorCodes'; -import { parseUnknownError, RnExecutorchError } from '../../errors/errorUtils'; -import { Logger } from '../../common/Logger'; -import { VisionModule } from './VisionModule'; +import { RnExecutorchError } from '../../errors/errorUtils'; +import { Frame, PixelData, ScalarType } from '../../types/common'; /** - * Module for object detection tasks. + * Base class for computer vision models that support multiple input types. + * + * VisionModule extends BaseModule with: + * - Unified `forward()` API accepting string paths or raw pixel data + * - `runOnFrame` getter for real-time VisionCamera frame processing + * - Shared frame processor creation logic + * + * Subclasses should only implement model-specific loading logic. * * @category Typescript API */ -export class ObjectDetectionModule extends VisionModule { +function isPixelData(input: unknown): input is PixelData { + return ( + typeof input === 'object' && + input !== null && + 'dataPtr' in input && + input.dataPtr instanceof Uint8Array && + 'sizes' in input && + Array.isArray(input.sizes) && + input.sizes.length === 3 && + 'scalarType' in input && + input.scalarType === ScalarType.BYTE + ); +} + +export abstract class VisionModule extends BaseModule { /** - * Loads the model, where `modelSource` is a string that specifies the location of the model binary. - * To track the download progress, supply a callback function `onDownloadProgressCallback`. + * Synchronous worklet function for real-time VisionCamera frame processing. + * + * Only available after the model is loaded. Returns null if not loaded. + * + * **Use this for VisionCamera frame processing in worklets.** + * For async processing, use `forward()` instead. + * + * @example + * ```typescript + * const model = new ClassificationModule(); + * await model.load({ modelSource: MODEL }); * - * @param model - Object containing `modelSource`. - * @param onDownloadProgressCallback - Optional callback to monitor download progress. + * // Use the functional form of setState to store the worklet β€” passing it + * // directly would cause React to invoke it immediately as an updater fn. + * const [runOnFrame, setRunOnFrame] = useState(null); + * setRunOnFrame(() => model.runOnFrame); + * + * const frameOutput = useFrameOutput({ + * onFrame(frame) { + * 'worklet'; + * if (!runOnFrame) return; + * const result = runOnFrame(frame); + * frame.dispose(); + * } + * }); + * ``` */ - async load( - model: { modelSource: ResourceSource }, - onDownloadProgressCallback: (progress: number) => void = () => {} - ): Promise { - try { - const paths = await ResourceFetcher.fetch( - onDownloadProgressCallback, - model.modelSource - ); + get runOnFrame(): ((frame: Frame, ...args: any[]) => TOutput) | null { + if (!this.nativeModule?.generateFromFrame) { + return null; + } - if (!paths?.[0]) { - throw new RnExecutorchError( - RnExecutorchErrorCode.DownloadInterrupted, - 'The download has been interrupted. As a result, not every file was downloaded. Please retry the download.' - ); - } + // Extract pure JSI function reference (runs on JS thread) + const nativeGenerateFromFrame = this.nativeModule.generateFromFrame; - this.nativeModule = global.loadObjectDetection(paths[0]); - } catch (error) { - Logger.error('Load failed:', error); - throw parseUnknownError(error); - } + // Return worklet that captures ONLY the JSI function + return (frame: any, ...args: any[]): TOutput => { + 'worklet'; + + let nativeBuffer: any = null; + try { + nativeBuffer = frame.getNativeBuffer(); + const frameData = { + nativeBuffer: nativeBuffer.pointer, + }; + return nativeGenerateFromFrame(frameData, ...args); + } finally { + if (nativeBuffer?.release) { + nativeBuffer.release(); + } + } + }; } - async forward( - input: string | PixelData, - detectionThreshold: number = 0.5 - ): Promise { - return super.forward(input, detectionThreshold); + /** + * Executes the model's forward pass with automatic input type detection. + * + * Supports two input types: + * 1. **String path/URI**: File path, URL, or Base64-encoded string + * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage) + * + * **Note**: For VisionCamera frame processing, use `runOnFrame` instead. + * This method is async and cannot be called in worklet context. + * + * @param input - Image source (string path or PixelData object) + * @param args - Additional model-specific arguments + * @returns A Promise that resolves to the model output. + * + * @example + * ```typescript + * // String path (async) + * const result1 = await model.forward('file:///path/to/image.jpg'); + * + * // Pixel data (async) + * const result2 = await model.forward({ + * dataPtr: new Uint8Array(pixelBuffer), + * sizes: [480, 640, 3], + * scalarType: ScalarType.BYTE + * }); + * + * // For VisionCamera frames, use runOnFrame in worklet: + * const frameOutput = useFrameOutput({ + * onFrame(frame) { + * 'worklet'; + * if (!model.runOnFrame) return; + * const result = model.runOnFrame(frame); + * } + * }); + * ``` + */ + async forward(input: string | PixelData, ...args: any[]): Promise { + if (this.nativeModule == null) + throw new RnExecutorchError( + RnExecutorchErrorCode.ModuleNotLoaded, + 'The model is currently not loaded. Please load the model before calling forward().' + ); + + // Type detection and routing + if (typeof input === 'string') { + return await this.nativeModule.generateFromString(input, ...args); + } else if (isPixelData(input)) { + return await this.nativeModule.generateFromPixels(input, ...args); + } else { + throw new RnExecutorchError( + RnExecutorchErrorCode.InvalidArgument, + 'Invalid input: expected string path or PixelData object. For VisionCamera frames, use runOnFrame instead.' + ); + } } } From fafb2cce1c26beb6740a90929f0290752c1d5d4b Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Tue, 17 Feb 2026 17:51:10 +0100 Subject: [PATCH 26/37] refactor: errors, logs, unnecessary comments, use existing TensorPtr --- .../app/object_detection/index.tsx | 61 ++++++++++--------- .../host_objects/JsiConversions.h | 19 ++++++ 2 files changed, 52 insertions(+), 28 deletions(-) diff --git a/apps/computer-vision/app/object_detection/index.tsx b/apps/computer-vision/app/object_detection/index.tsx index 9e60589fb..54c0eb18f 100644 --- a/apps/computer-vision/app/object_detection/index.tsx +++ b/apps/computer-vision/app/object_detection/index.tsx @@ -13,7 +13,26 @@ import ScreenWrapper from '../../ScreenWrapper'; import ColorPalette from '../../colors'; import { Images } from 'react-native-nitro-image'; -// Helper function to convert image URI to raw pixel data using NitroImage +// Helper function to convert BGRA to RGB +function convertBGRAtoRGB( + buffer: ArrayBuffer, + width: number, + height: number +): ArrayBuffer { + const source = new Uint8Array(buffer); + const rgb = new Uint8Array(width * height * 3); + + for (let i = 0; i < width * height; i++) { + // BGRA format: [B, G, R, A] β†’ RGB: [R, G, B] + rgb[i * 3 + 0] = source[i * 4 + 2]; // R + rgb[i * 3 + 1] = source[i * 4 + 1]; // G + rgb[i * 3 + 2] = source[i * 4 + 0]; // B + } + + return rgb.buffer; +} + +// Helper function to convert image URI to raw RGB pixel data async function imageUriToPixelData( uri: string, targetWidth: number, @@ -29,32 +48,19 @@ async function imageUriToPixelData( const image = await Images.loadFromFileAsync(uri); const resized = image.resize(targetWidth, targetHeight); - // Get pixel data as ArrayBuffer (RGBA format) - const pixelData = resized.toRawPixelData(); + // Get pixel data as ArrayBuffer (BGRA format from NitroImage) + const rawPixelData = resized.toRawPixelData(); const buffer = - pixelData instanceof ArrayBuffer ? pixelData : pixelData.buffer; - - // Calculate actual buffer dimensions (accounts for device pixel ratio) - const bufferSize = buffer?.byteLength || 0; - const totalPixels = bufferSize / 4; // RGBA = 4 bytes per pixel - const aspectRatio = targetWidth / targetHeight; - const actualHeight = Math.sqrt(totalPixels / aspectRatio); - const actualWidth = totalPixels / actualHeight; + rawPixelData instanceof ArrayBuffer ? rawPixelData : rawPixelData.buffer; - console.log('Requested:', targetWidth, 'x', targetHeight); - console.log('Buffer size:', bufferSize); - console.log( - 'Actual dimensions:', - Math.round(actualWidth), - 'x', - Math.round(actualHeight) - ); + // Convert BGRA to RGB as required by the native API + const rgbBuffer = convertBGRAtoRGB(buffer, targetWidth, targetHeight); return { - data: buffer, - width: Math.round(actualWidth), - height: Math.round(actualHeight), - channels: 4, // RGBA + data: rgbBuffer, + width: targetWidth, + height: targetHeight, + channels: 3, // RGB }; } catch (error) { console.error('Error loading image with NitroImage:', error); @@ -106,12 +112,11 @@ export default function ObjectDetectionScreen() { if (imageUri && imageDimensions) { try { console.log('Converting image to pixel data...'); - // Resize to 640x640 to avoid memory issues - const intermediateSize = 640; + // Use original dimensions - let the model resize internally const pixelData = await imageUriToPixelData( imageUri, - intermediateSize, - intermediateSize + imageDimensions.width, + imageDimensions.height ); console.log('Running forward with pixel data...', { @@ -122,7 +127,7 @@ export default function ObjectDetectionScreen() { }); // Run inference using unified forward() API - const output = await ssdLite.forward(pixelData, 0.5); + const output = await ssdLite.forward(pixelData, 0.3); console.log('Pixel data result:', output.length, 'detections'); setResults(output); } catch (e) { diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h index 7b97108b9..f4bfe09a6 100644 --- a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h +++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h @@ -369,6 +369,25 @@ inline jsi::Value getJsiValue(uint64_t val, jsi::Runtime &runtime) { return {runtime, bigInt}; } +inline jsi::Value getJsiValue(const std::vector &vec, + jsi::Runtime &runtime) { + jsi::Array array(runtime, vec.size()); + for (size_t i = 0; i < vec.size(); i++) { + // JS numbers are doubles. Large uint64s > 2^53 will lose precision. + array.setValueAtIndex(runtime, i, jsi::Value(static_cast(vec[i]))); + } + return {runtime, array}; +} + +inline jsi::Value getJsiValue(const std::vector &vec, + jsi::Runtime &runtime) { + jsi::Array array(runtime, vec.size()); + for (size_t i = 0; i < vec.size(); i++) { + array.setValueAtIndex(runtime, i, jsi::Value(static_cast(vec[i]))); + } + return {runtime, array}; +} + inline jsi::Value getJsiValue(int val, jsi::Runtime &runtime) { return {runtime, val}; } From 081d6ac3e60dec970988d699a1d0bc3b8484266d Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Wed, 18 Feb 2026 13:03:22 +0100 Subject: [PATCH 27/37] refactor: add or remove empty lines --- .../rnexecutorch/models/classification/Classification.cpp | 2 +- .../rnexecutorch/models/embeddings/image/ImageEmbeddings.h | 2 +- .../models/image_segmentation/BaseImageSegmentation.h | 2 +- .../common/rnexecutorch/models/style_transfer/StyleTransfer.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp index b9fad1b88..0fba07108 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp @@ -73,4 +73,4 @@ Classification::postprocess(const Tensor &tensor) { return probs; } -} // namespace rnexecutorch::models::classification \ No newline at end of file +} // namespace rnexecutorch::models::classification diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h index 9a1d6429b..7e114e939 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h @@ -27,4 +27,4 @@ class ImageEmbeddings final : public BaseEmbeddings { REGISTER_CONSTRUCTOR(models::embeddings::ImageEmbeddings, std::string, std::shared_ptr); -} // namespace rnexecutorch \ No newline at end of file +} // namespace rnexecutorch diff --git a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h index 34ad8dffd..f46f41d69 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h @@ -58,4 +58,4 @@ class BaseImageSegmentation : public BaseModel { REGISTER_CONSTRUCTOR(models::image_segmentation::BaseImageSegmentation, std::string, std::vector, std::vector, std::shared_ptr); -} // namespace rnexecutorch \ No newline at end of file +} // namespace rnexecutorch diff --git a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h index 8eed3c888..73744c4d8 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h @@ -33,4 +33,4 @@ class StyleTransfer : public BaseModel { REGISTER_CONSTRUCTOR(models::style_transfer::StyleTransfer, std::string, std::shared_ptr); -} // namespace rnexecutorch \ No newline at end of file +} // namespace rnexecutorch From 6192a4a7b62a96ce59254ad8c602f0c5c354a06f Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Thu, 19 Feb 2026 22:34:20 +0100 Subject: [PATCH 28/37] fix: errors after rebase --- .../common/rnexecutorch/host_objects/JsiConversions.h | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h index f4bfe09a6..586d924d3 100644 --- a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h +++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h @@ -369,16 +369,6 @@ inline jsi::Value getJsiValue(uint64_t val, jsi::Runtime &runtime) { return {runtime, bigInt}; } -inline jsi::Value getJsiValue(const std::vector &vec, - jsi::Runtime &runtime) { - jsi::Array array(runtime, vec.size()); - for (size_t i = 0; i < vec.size(); i++) { - // JS numbers are doubles. Large uint64s > 2^53 will lose precision. - array.setValueAtIndex(runtime, i, jsi::Value(static_cast(vec[i]))); - } - return {runtime, array}; -} - inline jsi::Value getJsiValue(const std::vector &vec, jsi::Runtime &runtime) { jsi::Array array(runtime, vec.size()); From f9108652c17666ae9732de7807d8f715c901ef8b Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Tue, 24 Feb 2026 09:02:14 +0100 Subject: [PATCH 29/37] feat: suggested changes / improve comments --- .../common/rnexecutorch/host_objects/JsiConversions.h | 9 --------- .../common/rnexecutorch/metaprogramming/TypeConcepts.h | 5 +++++ 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h index 586d924d3..7b97108b9 100644 --- a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h +++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h @@ -369,15 +369,6 @@ inline jsi::Value getJsiValue(uint64_t val, jsi::Runtime &runtime) { return {runtime, bigInt}; } -inline jsi::Value getJsiValue(const std::vector &vec, - jsi::Runtime &runtime) { - jsi::Array array(runtime, vec.size()); - for (size_t i = 0; i < vec.size(); i++) { - array.setValueAtIndex(runtime, i, jsi::Value(static_cast(vec[i]))); - } - return {runtime, array}; -} - inline jsi::Value getJsiValue(int val, jsi::Runtime &runtime) { return {runtime, val}; } diff --git a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h index fdf8c9dba..216e2bae3 100644 --- a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h +++ b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h @@ -11,6 +11,11 @@ concept DerivedFromOrSameAs = std::is_base_of_v; template concept SameAs = std::is_same_v; +template +concept HasGenerate = requires(T t) { + { &T::generate }; +}; + template concept HasGenerateFromString = requires(T t) { { &T::generateFromString }; From 0a8493b1deb4c4ef642fe0de8e03ac635642173e Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Wed, 25 Feb 2026 13:40:50 +0100 Subject: [PATCH 30/37] feat: make all cv models compatible with Vision Camera --- apps/computer-vision/app/_layout.tsx | 40 + .../app/classification_live/index.tsx | 255 ++++++ .../app/image_segmentation_live/index.tsx | 292 +++++++ .../app/object_detection_live/index.tsx | 101 ++- apps/computer-vision/app/ocr_live/index.tsx | 329 ++++++++ .../app/style_transfer/index.tsx | 73 +- .../app/style_transfer_live/index.tsx | 274 ++++++ .../app/vision_camera_live/index.tsx | 798 ++++++++++++++++++ .../host_objects/JsiConversions.h | 51 ++ .../rnexecutorch/models/VisionModel.cpp | 11 +- .../common/rnexecutorch/models/VisionModel.h | 14 + .../models/classification/Classification.cpp | 66 +- .../models/classification/Classification.h | 21 +- .../embeddings/image/ImageEmbeddings.cpp | 72 +- .../models/embeddings/image/ImageEmbeddings.h | 22 +- .../BaseImageSegmentation.cpp | 143 ++-- .../BaseImageSegmentation.h | 44 +- .../models/image_segmentation/Types.h | 17 + .../object_detection/ObjectDetection.cpp | 5 +- .../common/rnexecutorch/models/ocr/OCR.cpp | 66 +- .../common/rnexecutorch/models/ocr/OCR.h | 11 +- .../models/style_transfer/StyleTransfer.cpp | 94 ++- .../models/style_transfer/StyleTransfer.h | 27 +- .../models/style_transfer/Types.h | 14 + .../models/vertical_ocr/VerticalOCR.cpp | 70 +- .../models/vertical_ocr/VerticalOCR.h | 11 +- .../tests/integration/ClassificationTest.cpp | 16 +- .../tests/integration/ImageEmbeddingsTest.cpp | 16 +- .../tests/integration/OCRTest.cpp | 16 +- .../tests/integration/StyleTransferTest.cpp | 43 +- .../tests/integration/VerticalOCRTest.cpp | 41 +- .../src/controllers/BaseOCRController.ts | 57 +- .../computer_vision/useImageSegmentation.ts | 16 + .../src/hooks/computer_vision/useOCR.ts | 14 +- .../hooks/computer_vision/useVerticalOCR.ts | 14 +- .../src/hooks/useModule.ts | 2 + .../computer_vision/ClassificationModule.ts | 25 +- .../computer_vision/ImageEmbeddingsModule.ts | 22 +- .../ImageSegmentationModule.ts | 134 ++- .../computer_vision/StyleTransferModule.ts | 21 +- .../src/types/classification.ts | 45 +- .../src/types/imageEmbeddings.ts | 27 +- .../src/types/imageSegmentation.ts | 35 +- .../react-native-executorch/src/types/ocr.ts | 32 +- .../src/types/styleTransfer.ts | 29 +- 45 files changed, 3231 insertions(+), 295 deletions(-) create mode 100644 apps/computer-vision/app/classification_live/index.tsx create mode 100644 apps/computer-vision/app/image_segmentation_live/index.tsx create mode 100644 apps/computer-vision/app/ocr_live/index.tsx create mode 100644 apps/computer-vision/app/style_transfer_live/index.tsx create mode 100644 apps/computer-vision/app/vision_camera_live/index.tsx create mode 100644 packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/Types.h create mode 100644 packages/react-native-executorch/common/rnexecutorch/models/style_transfer/Types.h diff --git a/apps/computer-vision/app/_layout.tsx b/apps/computer-vision/app/_layout.tsx index 3970ac316..b614b54bf 100644 --- a/apps/computer-vision/app/_layout.tsx +++ b/apps/computer-vision/app/_layout.tsx @@ -91,6 +91,46 @@ export default function _layout() { headerTitleStyle: { color: ColorPalette.primary }, }} /> + + + + + { + setGlobalGenerating(isGenerating); + }, [isGenerating, setGlobalGenerating]); + + const [topLabel, setTopLabel] = useState(''); + const [topScore, setTopScore] = useState(0); + const [fps, setFps] = useState(0); + const lastFrameTimeRef = useRef(Date.now()); + + const cameraPermission = useCameraPermission(); + const devices = useCameraDevices(); + const device = devices.find((d) => d.position === 'back') ?? devices[0]; + + const format = useMemo(() => { + if (device == null) return undefined; + try { + return getCameraFormat(device, Templates.FrameProcessing); + } catch { + return undefined; + } + }, [device]); + + const updateStats = useCallback( + (result: { label: string; score: number }) => { + setTopLabel(result.label); + setTopScore(result.score); + const now = Date.now(); + const timeDiff = now - lastFrameTimeRef.current; + if (timeDiff > 0) { + setFps(Math.round(1000 / timeDiff)); + } + lastFrameTimeRef.current = now; + }, + [] + ); + + const frameOutput = useFrameOutput({ + pixelFormat: 'rgb', + onFrame(frame) { + 'worklet'; + if (!runOnFrame) { + frame.dispose(); + return; + } + try { + const result = runOnFrame(frame); + if (result) { + // find the top-1 entry + let bestLabel = ''; + let bestScore = -1; + const entries = Object.entries(result); + for (let i = 0; i < entries.length; i++) { + const [label, score] = entries[i]; + if ((score as number) > bestScore) { + bestScore = score as number; + bestLabel = label; + } + } + scheduleOnRN(updateStats, { label: bestLabel, score: bestScore }); + } + } catch { + // ignore frame errors + } finally { + frame.dispose(); + } + }, + }); + + if (!isReady) { + return ( + + ); + } + + if (!cameraPermission.hasPermission) { + return ( + + Camera access needed + cameraPermission.requestPermission()} + style={styles.button} + > + Grant Permission + + + ); + } + + if (device == null) { + return ( + + No camera device found + + ); + } + + return ( + + + + + + + + + + {topLabel || 'β€”'} + + + {topLabel ? (topScore * 100).toFixed(1) + '%' : ''} + + + + + {fps} + fps + + + + + ); +} + +const styles = StyleSheet.create({ + container: { + flex: 1, + backgroundColor: 'black', + }, + centered: { + flex: 1, + backgroundColor: 'black', + justifyContent: 'center', + alignItems: 'center', + gap: 16, + }, + message: { + color: 'white', + fontSize: 18, + }, + button: { + paddingHorizontal: 24, + paddingVertical: 12, + backgroundColor: ColorPalette.primary, + borderRadius: 24, + }, + buttonText: { + color: 'white', + fontSize: 15, + fontWeight: '600', + letterSpacing: 0.3, + }, + bottomBarWrapper: { + position: 'absolute', + bottom: 0, + left: 0, + right: 0, + alignItems: 'center', + paddingHorizontal: 16, + }, + bottomBar: { + flexDirection: 'row', + alignItems: 'center', + backgroundColor: 'rgba(0, 0, 0, 0.55)', + borderRadius: 24, + paddingHorizontal: 28, + paddingVertical: 10, + gap: 24, + maxWidth: '100%', + }, + labelContainer: { + flex: 1, + alignItems: 'flex-start', + }, + labelText: { + color: 'white', + fontSize: 16, + fontWeight: '700', + }, + scoreText: { + color: 'rgba(255,255,255,0.7)', + fontSize: 13, + fontWeight: '500', + }, + statItem: { + alignItems: 'center', + }, + statValue: { + color: 'white', + fontSize: 22, + fontWeight: '700', + letterSpacing: -0.5, + }, + statLabel: { + color: 'rgba(255,255,255,0.55)', + fontSize: 11, + fontWeight: '500', + textTransform: 'uppercase', + letterSpacing: 0.8, + }, + statDivider: { + width: 1, + height: 32, + backgroundColor: 'rgba(255,255,255,0.2)', + }, +}); diff --git a/apps/computer-vision/app/image_segmentation_live/index.tsx b/apps/computer-vision/app/image_segmentation_live/index.tsx new file mode 100644 index 000000000..f665c63c5 --- /dev/null +++ b/apps/computer-vision/app/image_segmentation_live/index.tsx @@ -0,0 +1,292 @@ +import React, { + useCallback, + useContext, + useEffect, + useMemo, + useRef, + useState, +} from 'react'; +import { + StatusBar, + StyleSheet, + Text, + TouchableOpacity, + useWindowDimensions, + View, +} from 'react-native'; +import { useSafeAreaInsets } from 'react-native-safe-area-context'; + +import { + Camera, + getCameraFormat, + Templates, + useCameraDevices, + useCameraPermission, + useFrameOutput, +} from 'react-native-vision-camera'; +import { scheduleOnRN } from 'react-native-worklets'; +import { + DEEPLAB_V3_RESNET50, + useImageSegmentation, +} from 'react-native-executorch'; +import { + Canvas, + Image as SkiaImage, + Skia, + AlphaType, + ColorType, + SkImage, +} from '@shopify/react-native-skia'; +import { GeneratingContext } from '../../context'; +import Spinner from '../../components/Spinner'; +import ColorPalette from '../../colors'; + +// RGBA colors for each DeepLab V3 class (alpha = 180 for semi-transparency) +const CLASS_COLORS: number[][] = [ + [0, 0, 0, 0], // 0 background β€” transparent + [51, 255, 87, 180], // 1 aeroplane + [51, 87, 255, 180], // 2 bicycle + [255, 51, 246, 180], // 3 bird + [51, 255, 246, 180], // 4 boat + [243, 255, 51, 180], // 5 bottle + [141, 51, 255, 180], // 6 bus + [255, 131, 51, 180], // 7 car + [51, 255, 131, 180], // 8 cat + [131, 51, 255, 180], // 9 chair + [255, 255, 51, 180], // 10 cow + [51, 255, 255, 180], // 11 diningtable + [255, 51, 143, 180], // 12 dog + [127, 51, 255, 180], // 13 horse + [51, 255, 175, 180], // 14 motorbike + [255, 175, 51, 180], // 15 person + [179, 255, 51, 180], // 16 pottedplant + [255, 87, 51, 180], // 17 sheep + [255, 51, 162, 180], // 18 sofa + [51, 162, 255, 180], // 19 train + [162, 51, 255, 180], // 20 tvmonitor +]; + +export default function ImageSegmentationLiveScreen() { + const insets = useSafeAreaInsets(); + const { width: screenWidth, height: screenHeight } = useWindowDimensions(); + + const { isReady, isGenerating, downloadProgress, runOnFrame } = + useImageSegmentation({ model: DEEPLAB_V3_RESNET50 }); + const { setGlobalGenerating } = useContext(GeneratingContext); + + useEffect(() => { + setGlobalGenerating(isGenerating); + }, [isGenerating, setGlobalGenerating]); + + const [maskImage, setMaskImage] = useState(null); + const [fps, setFps] = useState(0); + const lastFrameTimeRef = useRef(Date.now()); + + const cameraPermission = useCameraPermission(); + const devices = useCameraDevices(); + const device = devices.find((d) => d.position === 'back') ?? devices[0]; + + const format = useMemo(() => { + if (device == null) return undefined; + try { + return getCameraFormat(device, Templates.FrameProcessing); + } catch { + return undefined; + } + }, [device]); + + const updateMask = useCallback((img: SkImage) => { + setMaskImage(img); + const now = Date.now(); + const timeDiff = now - lastFrameTimeRef.current; + if (timeDiff > 0) { + setFps(Math.round(1000 / timeDiff)); + } + lastFrameTimeRef.current = now; + }, []); + + const frameOutput = useFrameOutput({ + pixelFormat: 'rgb', + dropFramesWhileBusy: true, + onFrame(frame) { + 'worklet'; + if (!runOnFrame) { + frame.dispose(); + return; + } + try { + const result = runOnFrame(frame, [], false); + if (result?.ARGMAX) { + const argmax: Int32Array = result.ARGMAX; + // Model output is always square (modelImageSize Γ— modelImageSize). + // Derive width/height from argmax length (sqrt for square output). + const side = Math.round(Math.sqrt(argmax.length)); + const width = side; + const height = side; + + // Build RGBA pixel buffer on the worklet thread to avoid transferring + // the large Int32Array across the workletβ†’RN boundary via scheduleOnRN. + const pixels = new Uint8Array(width * height * 4); + for (let i = 0; i < argmax.length; i++) { + const color = CLASS_COLORS[argmax[i]] ?? [0, 0, 0, 0]; + pixels[i * 4] = color[0]!; + pixels[i * 4 + 1] = color[1]!; + pixels[i * 4 + 2] = color[2]!; + pixels[i * 4 + 3] = color[3]!; + } + + const skData = Skia.Data.fromBytes(pixels); + const img = Skia.Image.MakeImage( + { + width, + height, + alphaType: AlphaType.Unpremul, + colorType: ColorType.RGBA_8888, + }, + skData, + width * 4 + ); + if (img) { + scheduleOnRN(updateMask, img); + } + } + } catch (e) { + console.log('frame error:', String(e)); + } finally { + frame.dispose(); + } + }, + }); + + if (!isReady) { + return ( + + ); + } + + if (!cameraPermission.hasPermission) { + return ( + + Camera access needed + cameraPermission.requestPermission()} + style={styles.button} + > + Grant Permission + + + ); + } + + if (device == null) { + return ( + + No camera device found + + ); + } + + return ( + + + + + + {maskImage && ( + + + + )} + + + + + {fps} + fps + + + + + ); +} + +const styles = StyleSheet.create({ + container: { + flex: 1, + backgroundColor: 'black', + }, + centered: { + flex: 1, + backgroundColor: 'black', + justifyContent: 'center', + alignItems: 'center', + gap: 16, + }, + message: { + color: 'white', + fontSize: 18, + }, + button: { + paddingHorizontal: 24, + paddingVertical: 12, + backgroundColor: ColorPalette.primary, + borderRadius: 24, + }, + buttonText: { + color: 'white', + fontSize: 15, + fontWeight: '600', + letterSpacing: 0.3, + }, + bottomBarWrapper: { + position: 'absolute', + bottom: 0, + left: 0, + right: 0, + alignItems: 'center', + }, + bottomBar: { + flexDirection: 'row', + alignItems: 'center', + backgroundColor: 'rgba(0, 0, 0, 0.55)', + borderRadius: 24, + paddingHorizontal: 28, + paddingVertical: 10, + gap: 24, + }, + statItem: { + alignItems: 'center', + }, + statValue: { + color: 'white', + fontSize: 22, + fontWeight: '700', + letterSpacing: -0.5, + }, + statLabel: { + color: 'rgba(255,255,255,0.55)', + fontSize: 11, + fontWeight: '500', + textTransform: 'uppercase', + letterSpacing: 0.8, + }, +}); diff --git a/apps/computer-vision/app/object_detection_live/index.tsx b/apps/computer-vision/app/object_detection_live/index.tsx index cd1e9cca8..d883fe8b9 100644 --- a/apps/computer-vision/app/object_detection_live/index.tsx +++ b/apps/computer-vision/app/object_detection_live/index.tsx @@ -35,6 +35,7 @@ import ColorPalette from '../../colors'; export default function ObjectDetectionLiveScreen() { const insets = useSafeAreaInsets(); + const [canvasSize, setCanvasSize] = useState({ width: 1, height: 1 }); const model = useObjectDetection({ model: SSDLITE_320_MOBILENET_V3_LARGE }); const { setGlobalGenerating } = useContext(GeneratingContext); @@ -43,7 +44,8 @@ export default function ObjectDetectionLiveScreen() { setGlobalGenerating(model.isGenerating); }, [model.isGenerating, setGlobalGenerating]); - const [detectionCount, setDetectionCount] = useState(0); + const [detections, setDetections] = useState([]); + const [imageSize, setImageSize] = useState({ width: 1, height: 1 }); const [fps, setFps] = useState(0); const lastFrameTimeRef = useRef(Date.now()); @@ -60,15 +62,23 @@ export default function ObjectDetectionLiveScreen() { } }, [device]); - const updateStats = useCallback((results: Detection[]) => { - setDetectionCount(results.length); - const now = Date.now(); - const timeDiff = now - lastFrameTimeRef.current; - if (timeDiff > 0) { - setFps(Math.round(1000 / timeDiff)); - } - lastFrameTimeRef.current = now; - }, []); + const updateDetections = useCallback( + (payload: { + results: Detection[]; + imageWidth: number; + imageHeight: number; + }) => { + setDetections(payload.results); + setImageSize({ width: payload.imageWidth, height: payload.imageHeight }); + const now = Date.now(); + const timeDiff = now - lastFrameTimeRef.current; + if (timeDiff > 0) { + setFps(Math.round(1000 / timeDiff)); + } + lastFrameTimeRef.current = now; + }, + [] + ); const frameOutput = useFrameOutput({ pixelFormat: 'rgb', @@ -79,10 +89,19 @@ export default function ObjectDetectionLiveScreen() { frame.dispose(); return; } + // After 90Β° CW rotation, the image fed to the model has swapped dims. + const imageWidth = + frame.width > frame.height ? frame.height : frame.width; + const imageHeight = + frame.width > frame.height ? frame.width : frame.height; try { const result = model.runOnFrame(frame, 0.5); if (result) { - scheduleOnRN(updateStats, result); + scheduleOnRN(updateDetections, { + results: result, + imageWidth, + imageHeight, + }); } } catch { // ignore frame errors @@ -135,13 +154,51 @@ export default function ObjectDetectionLiveScreen() { format={format} /> + {/* Bounding box overlay β€” measured to match the exact camera preview area */} + + setCanvasSize({ + width: e.nativeEvent.layout.width, + height: e.nativeEvent.layout.height, + }) + } + > + {(() => { + // Cover-fit: camera preview scales to fill the canvas, cropping the + // excess. Compute the same transform so bbox pixel coords map correctly. + const scale = Math.max( + canvasSize.width / imageSize.width, + canvasSize.height / imageSize.height + ); + const offsetX = (canvasSize.width - imageSize.width * scale) / 2; + const offsetY = (canvasSize.height - imageSize.height * scale) / 2; + return detections.map((det, i) => { + const left = det.bbox.x1 * scale + offsetX; + const top = det.bbox.y1 * scale + offsetY; + const width = (det.bbox.x2 - det.bbox.x1) * scale; + const height = (det.bbox.y2 - det.bbox.y1) * scale; + return ( + + + + {det.label} {(det.score * 100).toFixed(0)}% + + + + ); + }); + })()} + + - {detectionCount} + {detections.length} objects @@ -183,6 +240,26 @@ const styles = StyleSheet.create({ fontWeight: '600', letterSpacing: 0.3, }, + bbox: { + position: 'absolute', + borderWidth: 2, + borderColor: ColorPalette.primary, + borderRadius: 4, + }, + bboxLabel: { + position: 'absolute', + top: -22, + left: -2, + backgroundColor: ColorPalette.primary, + paddingHorizontal: 6, + paddingVertical: 2, + borderRadius: 4, + }, + bboxLabelText: { + color: 'white', + fontSize: 11, + fontWeight: '600', + }, bottomBarWrapper: { position: 'absolute', bottom: 0, diff --git a/apps/computer-vision/app/ocr_live/index.tsx b/apps/computer-vision/app/ocr_live/index.tsx new file mode 100644 index 000000000..a0c93899f --- /dev/null +++ b/apps/computer-vision/app/ocr_live/index.tsx @@ -0,0 +1,329 @@ +import React, { + useCallback, + useContext, + useEffect, + useMemo, + useRef, + useState, +} from 'react'; +import { + StatusBar, + StyleSheet, + Text, + TouchableOpacity, + View, +} from 'react-native'; +import { useSafeAreaInsets } from 'react-native-safe-area-context'; + +import { + Camera, + getCameraFormat, + Templates, + useCameraDevices, + useCameraPermission, + useFrameOutput, +} from 'react-native-vision-camera'; +import { scheduleOnRN } from 'react-native-worklets'; +import { OCR_ENGLISH, useOCR, OCRDetection } from 'react-native-executorch'; +import { + Canvas, + Path, + Skia, + Text as SkiaText, + matchFont, +} from '@shopify/react-native-skia'; +import { GeneratingContext } from '../../context'; +import Spinner from '../../components/Spinner'; +import ColorPalette from '../../colors'; + +interface FrameDetections { + detections: OCRDetection[]; + frameWidth: number; + frameHeight: number; +} + +export default function OCRLiveScreen() { + const insets = useSafeAreaInsets(); + const [canvasSize, setCanvasSize] = useState({ width: 1, height: 1 }); + + const { isReady, isGenerating, downloadProgress, runOnFrame } = useOCR({ + model: OCR_ENGLISH, + }); + const { setGlobalGenerating } = useContext(GeneratingContext); + + useEffect(() => { + setGlobalGenerating(isGenerating); + }, [isGenerating, setGlobalGenerating]); + + const [frameDetections, setFrameDetections] = useState({ + detections: [], + frameWidth: 1, + frameHeight: 1, + }); + const [fps, setFps] = useState(0); + const lastFrameTimeRef = useRef(Date.now()); + + const font = matchFont({ fontFamily: 'Helvetica', fontSize: 11 }); + + const cameraPermission = useCameraPermission(); + const devices = useCameraDevices(); + const device = devices.find((d) => d.position === 'back') ?? devices[0]; + + const format = useMemo(() => { + if (device == null) return undefined; + try { + return getCameraFormat(device, Templates.FrameProcessing); + } catch { + return undefined; + } + }, [device]); + + const updateDetections = useCallback((result: FrameDetections) => { + setFrameDetections(result); + const now = Date.now(); + const timeDiff = now - lastFrameTimeRef.current; + if (timeDiff > 0) { + setFps(Math.round(1000 / timeDiff)); + } + lastFrameTimeRef.current = now; + }, []); + + const frameOutput = useFrameOutput({ + dropFramesWhileBusy: true, + pixelFormat: 'rgb', + onFrame(frame) { + 'worklet'; + if (!runOnFrame) { + frame.dispose(); + return; + } + const frameWidth = frame.width; + const frameHeight = frame.height; + try { + const result = runOnFrame(frame); + if (result) { + scheduleOnRN(updateDetections, { + detections: result, + frameWidth, + frameHeight, + }); + } + } catch { + // ignore frame errors + } finally { + frame.dispose(); + } + }, + }); + + if (!isReady) { + return ( + + ); + } + + if (!cameraPermission.hasPermission) { + return ( + + Camera access needed + cameraPermission.requestPermission()} + style={styles.button} + > + Grant Permission + + + ); + } + + if (device == null) { + return ( + + No camera device found + + ); + } + + const { detections, frameWidth, frameHeight } = frameDetections; + + // OCR runs on the raw landscape frame (no rotation applied in native). + // The camera preview displays it as portrait (90Β° CW rotation applied by iOS). + // After rotation the image dimensions become (frameHeight Γ— frameWidth). + // Cover-fit scale uses post-rotation dims to match what the preview shows. + const isLandscape = frameWidth > frameHeight; + const imageW = isLandscape ? frameHeight : frameWidth; + const imageH = isLandscape ? frameWidth : frameHeight; + const scale = Math.max(canvasSize.width / imageW, canvasSize.height / imageH); + const offsetX = (canvasSize.width - imageW * scale) / 2; + const offsetY = (canvasSize.height - imageH * scale) / 2; + + // Map a raw landscape point to screen coords accounting for rotation + cover-fit. + function toScreenX(px: number, py: number) { + // After 90Β° CW: rotated_x = frameHeight - py, rotated_y = px + const rx = isLandscape ? frameHeight - py : px; + return rx * scale + offsetX; + } + function toScreenY(px: number, py: number) { + const ry = isLandscape ? px : py; + return ry * scale + offsetY; + } + + return ( + + + + + + {/* Measure the overlay area, then draw polygons inside a Canvas */} + + setCanvasSize({ + width: e.nativeEvent.layout.width, + height: e.nativeEvent.layout.height, + }) + } + > + + {detections.map((det, i) => { + if (!det.bbox || det.bbox.length < 2) return null; + + const path = Skia.Path.Make(); + path.moveTo( + toScreenX(det.bbox[0]!.x, det.bbox[0]!.y), + toScreenY(det.bbox[0]!.x, det.bbox[0]!.y) + ); + for (let j = 1; j < det.bbox.length; j++) { + path.lineTo( + toScreenX(det.bbox[j]!.x, det.bbox[j]!.y), + toScreenY(det.bbox[j]!.x, det.bbox[j]!.y) + ); + } + path.close(); + + const labelX = toScreenX(det.bbox[0]!.x, det.bbox[0]!.y); + const labelY = Math.max( + 0, + toScreenY(det.bbox[0]!.x, det.bbox[0]!.y) - 4 + ); + + return ( + + + + {font && ( + + )} + + ); + })} + + + + + + + {detections.length} + regions + + + + {fps} + fps + + + + + ); +} + +const styles = StyleSheet.create({ + container: { + flex: 1, + backgroundColor: 'black', + }, + centered: { + flex: 1, + backgroundColor: 'black', + justifyContent: 'center', + alignItems: 'center', + gap: 16, + }, + message: { + color: 'white', + fontSize: 18, + }, + button: { + paddingHorizontal: 24, + paddingVertical: 12, + backgroundColor: ColorPalette.primary, + borderRadius: 24, + }, + buttonText: { + color: 'white', + fontSize: 15, + fontWeight: '600', + letterSpacing: 0.3, + }, + bottomBarWrapper: { + position: 'absolute', + bottom: 0, + left: 0, + right: 0, + alignItems: 'center', + }, + bottomBar: { + flexDirection: 'row', + alignItems: 'center', + backgroundColor: 'rgba(0, 0, 0, 0.55)', + borderRadius: 24, + paddingHorizontal: 28, + paddingVertical: 10, + gap: 24, + }, + statItem: { + alignItems: 'center', + }, + statValue: { + color: 'white', + fontSize: 22, + fontWeight: '700', + letterSpacing: -0.5, + }, + statLabel: { + color: 'rgba(255,255,255,0.55)', + fontSize: 11, + fontWeight: '500', + textTransform: 'uppercase', + letterSpacing: 0.8, + }, + statDivider: { + width: 1, + height: 32, + backgroundColor: 'rgba(255,255,255,0.2)', + }, +}); diff --git a/apps/computer-vision/app/style_transfer/index.tsx b/apps/computer-vision/app/style_transfer/index.tsx index a1b3a7834..466900a6f 100644 --- a/apps/computer-vision/app/style_transfer/index.tsx +++ b/apps/computer-vision/app/style_transfer/index.tsx @@ -5,6 +5,14 @@ import { useStyleTransfer, STYLE_TRANSFER_CANDY, } from 'react-native-executorch'; +import { + Canvas, + Image as SkiaImage, + Skia, + AlphaType, + ColorType, + SkImage, +} from '@shopify/react-native-skia'; import { View, StyleSheet, Image } from 'react-native'; import React, { useContext, useEffect, useState } from 'react'; import { GeneratingContext } from '../../context'; @@ -16,12 +24,16 @@ export default function StyleTransferScreen() { useEffect(() => { setGlobalGenerating(model.isGenerating); }, [model.isGenerating, setGlobalGenerating]); + const [imageUri, setImageUri] = useState(''); + const [styledImage, setStyledImage] = useState(null); + const handleCameraPress = async (isCamera: boolean) => { const image = await getImage(isCamera); const uri = image?.uri; if (typeof uri === 'string') { - setImageUri(uri as string); + setImageUri(uri); + setStyledImage(null); } }; @@ -29,7 +41,29 @@ export default function StyleTransferScreen() { if (imageUri) { try { const output = await model.forward(imageUri); - setImageUri(output); + const height = output.sizes[0]; + const width = output.sizes[1]; + // Convert RGB -> RGBA for Skia + const rgba = new Uint8Array(width * height * 4); + const rgb = output.dataPtr; + for (let i = 0; i < width * height; i++) { + rgba[i * 4] = rgb[i * 3]; + rgba[i * 4 + 1] = rgb[i * 3 + 1]; + rgba[i * 4 + 2] = rgb[i * 3 + 2]; + rgba[i * 4 + 3] = 255; + } + const skData = Skia.Data.fromBytes(rgba); + const img = Skia.Image.MakeImage( + { + width, + height, + alphaType: AlphaType.Opaque, + colorType: ColorType.RGBA_8888, + }, + skData, + width * 4 + ); + setStyledImage(img); } catch (e) { console.error(e); } @@ -48,15 +82,28 @@ export default function StyleTransferScreen() { return ( - + {styledImage ? ( + + + + ) : ( + + )} { + setGlobalGenerating(isGenerating); + }, [isGenerating, setGlobalGenerating]); + + const [styledImage, setStyledImage] = useState(null); + const [fps, setFps] = useState(0); + const lastFrameTimeRef = useRef(Date.now()); + + const cameraPermission = useCameraPermission(); + const devices = useCameraDevices(); + const device = devices.find((d) => d.position === 'back') ?? devices[0]; + + const format = useMemo(() => { + if (device == null) return undefined; + try { + return getCameraFormat(device, Templates.FrameProcessing); + } catch { + return undefined; + } + }, [device]); + + const updateImage = useCallback((img: SkImage) => { + setStyledImage((prev) => { + prev?.dispose(); + return img; + }); + const now = Date.now(); + const timeDiff = now - lastFrameTimeRef.current; + if (timeDiff > 0) { + setFps(Math.round(1000 / timeDiff)); + } + lastFrameTimeRef.current = now; + }, []); + + const frameOutput = useFrameOutput({ + pixelFormat: 'rgb', + dropFramesWhileBusy: true, + onFrame(frame) { + 'worklet'; + if (!runOnFrame) { + frame.dispose(); + return; + } + try { + const result = runOnFrame(frame); + if (result?.dataPtr) { + const { dataPtr, sizes } = result; + const height = sizes[0]; + const width = sizes[1]; + // Build Skia image on the worklet thread β€” avoids transferring the + // large pixel buffer across the workletβ†’RN boundary via scheduleOnRN. + const skData = Skia.Data.fromBytes(dataPtr); + const img = Skia.Image.MakeImage( + { + width, + height, + alphaType: AlphaType.Opaque, + colorType: ColorType.RGBA_8888, + }, + skData, + width * 4 + ); + if (img) { + scheduleOnRN(updateImage, img); + } + } + } catch (e) { + console.log('frame error:', String(e)); + } finally { + frame.dispose(); + } + }, + }); + + if (!isReady) { + return ( + + ); + } + + if (!cameraPermission.hasPermission) { + return ( + + Camera access needed + cameraPermission.requestPermission()} + style={styles.button} + > + Grant Permission + + + ); + } + + if (device == null) { + return ( + + No camera device found + + ); + } + + return ( + + + + {/* Camera always runs to keep frame processing active */} + + + {/* Styled output overlays the camera feed once available */} + {styledImage && ( + + + + )} + + + + + {fps} + fps + + + + candy + style + + + + + ); +} + +const styles = StyleSheet.create({ + container: { + flex: 1, + backgroundColor: 'black', + }, + centered: { + flex: 1, + backgroundColor: 'black', + justifyContent: 'center', + alignItems: 'center', + gap: 16, + }, + message: { + color: 'white', + fontSize: 18, + }, + button: { + paddingHorizontal: 24, + paddingVertical: 12, + backgroundColor: ColorPalette.primary, + borderRadius: 24, + }, + buttonText: { + color: 'white', + fontSize: 15, + fontWeight: '600', + letterSpacing: 0.3, + }, + bottomBarWrapper: { + position: 'absolute', + bottom: 0, + left: 0, + right: 0, + alignItems: 'center', + }, + bottomBar: { + flexDirection: 'row', + alignItems: 'center', + backgroundColor: 'rgba(0, 0, 0, 0.55)', + borderRadius: 24, + paddingHorizontal: 28, + paddingVertical: 10, + gap: 24, + }, + statItem: { + alignItems: 'center', + }, + statValue: { + color: 'white', + fontSize: 22, + fontWeight: '700', + letterSpacing: -0.5, + }, + styleLabel: { + color: 'white', + fontSize: 16, + fontWeight: '700', + }, + statLabel: { + color: 'rgba(255,255,255,0.55)', + fontSize: 11, + fontWeight: '500', + textTransform: 'uppercase', + letterSpacing: 0.8, + }, + statDivider: { + width: 1, + height: 32, + backgroundColor: 'rgba(255,255,255,0.2)', + }, +}); diff --git a/apps/computer-vision/app/vision_camera_live/index.tsx b/apps/computer-vision/app/vision_camera_live/index.tsx new file mode 100644 index 000000000..4c7b425b1 --- /dev/null +++ b/apps/computer-vision/app/vision_camera_live/index.tsx @@ -0,0 +1,798 @@ +import React, { + useCallback, + useContext, + useEffect, + useMemo, + useRef, + useState, +} from 'react'; +import { + ScrollView, + StatusBar, + StyleSheet, + Text, + TouchableOpacity, + View, +} from 'react-native'; +import { useSafeAreaInsets } from 'react-native-safe-area-context'; +import { + Camera, + Frame, + getCameraFormat, + Templates, + useCameraDevices, + useCameraPermission, + useFrameOutput, +} from 'react-native-vision-camera'; +import { createSynchronizable, runOnJS } from 'react-native-worklets'; +import { + DEEPLAB_V3_RESNET50, + Detection, + EFFICIENTNET_V2_S, + OCRDetection, + OCR_ENGLISH, + SSDLITE_320_MOBILENET_V3_LARGE, + STYLE_TRANSFER_RAIN_PRINCESS, + useClassification, + useImageSegmentation, + useObjectDetection, + useOCR, + useStyleTransfer, +} from 'react-native-executorch'; +import { + AlphaType, + Canvas, + ColorType, + Image as SkiaImage, + matchFont, + Path, + Skia, + SkImage, + Text as SkiaText, +} from '@shopify/react-native-skia'; +import { GeneratingContext } from '../../context'; +import Spinner from '../../components/Spinner'; +import ColorPalette from '../../colors'; + +// ─── Model IDs ─────────────────────────────────────────────────────────────── + +type ModelId = + | 'classification' + | 'object_detection' + | 'segmentation' + | 'style_transfer' + | 'ocr'; + +const MODELS: { id: ModelId; label: string }[] = [ + { id: 'classification', label: 'Classification' }, + { id: 'object_detection', label: 'Object Detection' }, + { id: 'segmentation', label: 'Segmentation' }, + { id: 'style_transfer', label: 'Style Transfer' }, + { id: 'ocr', label: 'OCR' }, +]; + +// ─── Segmentation colors ───────────────────────────────────────────────────── + +const CLASS_COLORS: number[][] = [ + [0, 0, 0, 0], + [51, 255, 87, 180], + [51, 87, 255, 180], + [255, 51, 246, 180], + [51, 255, 246, 180], + [243, 255, 51, 180], + [141, 51, 255, 180], + [255, 131, 51, 180], + [51, 255, 131, 180], + [131, 51, 255, 180], + [255, 255, 51, 180], + [51, 255, 255, 180], + [255, 51, 143, 180], + [127, 51, 255, 180], + [51, 255, 175, 180], + [255, 175, 51, 180], + [179, 255, 51, 180], + [255, 87, 51, 180], + [255, 51, 162, 180], + [51, 162, 255, 180], + [162, 51, 255, 180], +]; + +// ─── Kill switch β€” synchronizable boolean shared between JS and worklet thread. +// setBlocking(true) immediately stops the worklet from dispatching new work +// (both in onFrame and inside the async callback) before the old model tears down. +const frameKillSwitch = createSynchronizable(false); + +// ─── Screen ────────────────────────────────────────────────────────────────── + +export default function VisionCameraLiveScreen() { + const insets = useSafeAreaInsets(); + const [activeModel, setActiveModel] = useState('classification'); + const [canvasSize, setCanvasSize] = useState({ width: 1, height: 1 }); + const { setGlobalGenerating } = useContext(GeneratingContext); + + // ── Models (only the active model loads; others are prevented) ── + const classification = useClassification({ + model: EFFICIENTNET_V2_S, + preventLoad: activeModel !== 'classification', + }); + const objectDetection = useObjectDetection({ + model: SSDLITE_320_MOBILENET_V3_LARGE, + preventLoad: activeModel !== 'object_detection', + }); + const segmentation = useImageSegmentation({ + model: DEEPLAB_V3_RESNET50, + preventLoad: activeModel !== 'segmentation', + }); + const styleTransfer = useStyleTransfer({ + model: STYLE_TRANSFER_RAIN_PRINCESS, + preventLoad: activeModel !== 'style_transfer', + }); + const ocr = useOCR({ + model: OCR_ENGLISH, + preventLoad: activeModel !== 'ocr', + }); + + const activeIsGenerating = { + classification: classification.isGenerating, + object_detection: objectDetection.isGenerating, + segmentation: segmentation.isGenerating, + style_transfer: styleTransfer.isGenerating, + ocr: ocr.isGenerating, + }[activeModel]; + + useEffect(() => { + setGlobalGenerating(activeIsGenerating); + }, [activeIsGenerating, setGlobalGenerating]); + + // ── Camera ── + const [fps, setFps] = useState(0); + const lastFrameTimeRef = useRef(Date.now()); + const cameraPermission = useCameraPermission(); + const devices = useCameraDevices(); + const device = devices.find((d) => d.position === 'back') ?? devices[0]; + const format = useMemo(() => { + if (device == null) return undefined; + try { + return getCameraFormat(device, Templates.FrameProcessing); + } catch { + return undefined; + } + }, [device]); + + // ── Per-model result state ── + const [classResult, setClassResult] = useState({ label: '', score: 0 }); + const [detections, setDetections] = useState([]); + const [imageSize, setImageSize] = useState({ width: 1, height: 1 }); + const [maskImage, setMaskImage] = useState(null); + const [styledImage, setStyledImage] = useState(null); + const [ocrData, setOcrData] = useState<{ + detections: OCRDetection[]; + frameWidth: number; + frameHeight: number; + }>({ detections: [], frameWidth: 1, frameHeight: 1 }); + + // ── Stable callbacks ── + function tick() { + const now = Date.now(); + const diff = now - lastFrameTimeRef.current; + if (diff > 0) setFps(Math.round(1000 / diff)); + lastFrameTimeRef.current = now; + } + + const updateClass = useCallback((r: { label: string; score: number }) => { + setClassResult(r); + tick(); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []); + + const updateDetections = useCallback( + (p: { results: Detection[]; imageWidth: number; imageHeight: number }) => { + setDetections(p.results); + setImageSize({ width: p.imageWidth, height: p.imageHeight }); + tick(); + }, + // eslint-disable-next-line react-hooks/exhaustive-deps + [] + ); + + const updateMask = useCallback((img: SkImage) => { + setMaskImage((prev) => { + prev?.dispose(); + return img; + }); + tick(); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []); + + const updateStyled = useCallback((img: SkImage) => { + setStyledImage((prev) => { + prev?.dispose(); + return img; + }); + tick(); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []); + + const updateOcr = useCallback( + (d: { + detections: OCRDetection[]; + frameWidth: number; + frameHeight: number; + }) => { + setOcrData(d); + tick(); + }, + // eslint-disable-next-line react-hooks/exhaustive-deps + [] + ); + + // ── runOnJS-wrapped callbacks β€” created on the RN thread so the Babel plugin + // can serialize them into remote functions. These can then be safely called + // from any worklet runtime, including the asyncRunner's worker runtime. + const notifyClass = runOnJS(updateClass); + const notifyDetections = runOnJS(updateDetections); + const notifyMask = runOnJS(updateMask); + const notifyStyled = runOnJS(updateStyled); + const notifyOcr = runOnJS(updateOcr); + + // ── Pull the active model's runOnFrame out of the hook each render. + // These are worklet functions (not plain JS objects), so they CAN be + // captured directly in a useCallback closure β€” the worklets runtime + // serializes them correctly. A new closure is produced whenever the + // active runOnFrame changes, causing useFrameOutput to re-register. + const classRof = classification.runOnFrame; + const detRof = objectDetection.runOnFrame; + const segRof = segmentation.runOnFrame; + const stRof = styleTransfer.runOnFrame; + const ocrRof = ocr.runOnFrame; + + // When switching models: activate kill switch synchronously so the worklet + // thread stops calling runOnFrame before delete() fires on the old model. + // Then re-enable once the new model's preventLoad has taken effect. + useEffect(() => { + frameKillSwitch.setBlocking(true); + setMaskImage((prev) => { + prev?.dispose(); + return null; + }); + setStyledImage((prev) => { + prev?.dispose(); + return null; + }); + const id = setTimeout(() => { + frameKillSwitch.setBlocking(false); + }, 300); + return () => clearTimeout(id); + }, [activeModel]); + + // ── Single frame output. + // onFrame is re-created (and re-registered by useFrameOutput) whenever the + // active model or its runOnFrame worklet changes. The kill switch provides + // synchronous cross-thread protection during the transition window. + const frameOutput = useFrameOutput({ + pixelFormat: 'rgb', + dropFramesWhileBusy: true, + onFrame: useCallback( + (frame: Frame) => { + 'worklet'; + + // Kill switch is set synchronously from JS when switching models β€” + // guaranteed visible here before the next frame is dispatched. + if (frameKillSwitch.getDirty()) { + frame.dispose(); + return; + } + + try { + if (activeModel === 'classification') { + if (!classRof) return; + const result = classRof(frame); + if (result) { + let bestLabel = ''; + let bestScore = -1; + const entries = Object.entries(result); + for (let i = 0; i < entries.length; i++) { + const [label, score] = entries[i]!; + if ((score as number) > bestScore) { + bestScore = score as number; + bestLabel = label; + } + } + notifyClass({ + label: bestLabel, + score: bestScore, + }); + } + } else if (activeModel === 'object_detection') { + if (!detRof) return; + const iw = frame.width > frame.height ? frame.height : frame.width; + const ih = frame.width > frame.height ? frame.width : frame.height; + const result = detRof(frame, 0.5); + if (result) { + notifyDetections({ + results: result, + imageWidth: iw, + imageHeight: ih, + }); + } + } else if (activeModel === 'segmentation') { + if (!segRof) return; + const result = segRof(frame, [], false); + if (result?.ARGMAX) { + const argmax: Int32Array = result.ARGMAX; + const side = Math.round(Math.sqrt(argmax.length)); + const pixels = new Uint8Array(side * side * 4); + for (let i = 0; i < argmax.length; i++) { + const color = CLASS_COLORS[argmax[i]!] ?? [0, 0, 0, 0]; + pixels[i * 4] = color[0]!; + pixels[i * 4 + 1] = color[1]!; + pixels[i * 4 + 2] = color[2]!; + pixels[i * 4 + 3] = color[3]!; + } + const skData = Skia.Data.fromBytes(pixels); + const img = Skia.Image.MakeImage( + { + width: side, + height: side, + alphaType: AlphaType.Unpremul, + colorType: ColorType.RGBA_8888, + }, + skData, + side * 4 + ); + if (img) notifyMask(img); + } + } else if (activeModel === 'style_transfer') { + if (!stRof) return; + const result = stRof(frame); + if (result?.dataPtr) { + const { dataPtr, sizes } = result; + const h = sizes[0]!; + const w = sizes[1]!; + const skData = Skia.Data.fromBytes(dataPtr); + const img = Skia.Image.MakeImage( + { + width: w, + height: h, + alphaType: AlphaType.Opaque, + colorType: ColorType.RGBA_8888, + }, + skData, + w * 4 + ); + if (img) notifyStyled(img); + } + } else if (activeModel === 'ocr') { + if (!ocrRof) return; + const fw = frame.width; + const fh = frame.height; + const result = ocrRof(frame); + if (result) { + notifyOcr({ + detections: result, + frameWidth: fw, + frameHeight: fh, + }); + } + } + } catch { + // ignore + } finally { + frame.dispose(); + } + }, + [ + activeModel, + classRof, + detRof, + segRof, + stRof, + ocrRof, + notifyClass, + notifyDetections, + notifyMask, + notifyStyled, + notifyOcr, + ] + ), + }); + + // ── Loading state: only care about the active model ── + const activeIsReady = { + classification: classification.isReady, + object_detection: objectDetection.isReady, + segmentation: segmentation.isReady, + style_transfer: styleTransfer.isReady, + ocr: ocr.isReady, + }[activeModel]; + + const activeDownloadProgress = { + classification: classification.downloadProgress, + object_detection: objectDetection.downloadProgress, + segmentation: segmentation.downloadProgress, + style_transfer: styleTransfer.downloadProgress, + ocr: ocr.downloadProgress, + }[activeModel]; + + if (!cameraPermission.hasPermission) { + return ( + + Camera access needed + cameraPermission.requestPermission()} + style={styles.button} + > + Grant Permission + + + ); + } + + if (device == null) { + return ( + + No camera device found + + ); + } + + // ── Cover-fit helpers ── + function coverFit(imgW: number, imgH: number) { + const scale = Math.max(canvasSize.width / imgW, canvasSize.height / imgH); + return { + scale, + offsetX: (canvasSize.width - imgW * scale) / 2, + offsetY: (canvasSize.height - imgH * scale) / 2, + }; + } + + // ── OCR coord transform ── + const { + detections: ocrDets, + frameWidth: ocrFW, + frameHeight: ocrFH, + } = ocrData; + const ocrIsLandscape = ocrFW > ocrFH; + const ocrImgW = ocrIsLandscape ? ocrFH : ocrFW; + const ocrImgH = ocrIsLandscape ? ocrFW : ocrFH; + const { + scale: ocrScale, + offsetX: ocrOX, + offsetY: ocrOY, + } = coverFit(ocrImgW, ocrImgH); + function ocrToX(px: number, py: number) { + return (ocrIsLandscape ? ocrFH - py : px) * ocrScale + ocrOX; + } + function ocrToY(px: number, py: number) { + return (ocrIsLandscape ? px : py) * ocrScale + ocrOY; + } + + // ── Object detection cover-fit ── + const { + scale: detScale, + offsetX: detOX, + offsetY: detOY, + } = coverFit(imageSize.width, imageSize.height); + + const font = matchFont({ fontFamily: 'Helvetica', fontSize: 11 }); + + return ( + + + + + + {/* ── Overlays ── */} + + setCanvasSize({ + width: e.nativeEvent.layout.width, + height: e.nativeEvent.layout.height, + }) + } + > + {activeModel === 'segmentation' && maskImage && ( + + + + )} + + {activeModel === 'style_transfer' && styledImage && ( + + + + )} + + {activeModel === 'object_detection' && ( + <> + {detections.map((det, i) => { + const left = det.bbox.x1 * detScale + detOX; + const top = det.bbox.y1 * detScale + detOY; + const w = (det.bbox.x2 - det.bbox.x1) * detScale; + const h = (det.bbox.y2 - det.bbox.y1) * detScale; + return ( + + + + {det.label} {(det.score * 100).toFixed(0)}% + + + + ); + })} + + )} + + {activeModel === 'ocr' && ( + + {ocrDets.map((det, i) => { + if (!det.bbox || det.bbox.length < 2) return null; + const path = Skia.Path.Make(); + path.moveTo( + ocrToX(det.bbox[0]!.x, det.bbox[0]!.y), + ocrToY(det.bbox[0]!.x, det.bbox[0]!.y) + ); + for (let j = 1; j < det.bbox.length; j++) { + path.lineTo( + ocrToX(det.bbox[j]!.x, det.bbox[j]!.y), + ocrToY(det.bbox[j]!.x, det.bbox[j]!.y) + ); + } + path.close(); + const lx = ocrToX(det.bbox[0]!.x, det.bbox[0]!.y); + const ly = Math.max( + 0, + ocrToY(det.bbox[0]!.x, det.bbox[0]!.y) - 4 + ); + return ( + + + + {font && ( + + )} + + ); + })} + + )} + + + {!activeIsReady && ( + + m.id === activeModel)?.label} ${(activeDownloadProgress * 100).toFixed(0)}%`} + /> + + )} + + + + {MODELS.map((m) => ( + setActiveModel(m.id)} + > + + {m.label} + + + ))} + + + + + + {activeModel === 'classification' && ( + + + {classResult.label || 'β€”'} + + {classResult.label ? ( + + {(classResult.score * 100).toFixed(1)}% + + ) : null} + + )} + {activeModel === 'object_detection' && ( + + {detections.length} + objects + + )} + {activeModel === 'segmentation' && ( + + DeepLab V3 + segmentation + + )} + {activeModel === 'style_transfer' && ( + + Rain Princess + style + + )} + {activeModel === 'ocr' && ( + + {ocrDets.length} + regions + + )} + + + {fps} + fps + + + + + ); +} + +// ─── Styles ────────────────────────────────────────────────────────────────── + +const styles = StyleSheet.create({ + container: { flex: 1, backgroundColor: 'black' }, + centered: { + flex: 1, + backgroundColor: 'black', + justifyContent: 'center', + alignItems: 'center', + gap: 16, + }, + message: { color: 'white', fontSize: 18 }, + button: { + paddingHorizontal: 24, + paddingVertical: 12, + backgroundColor: ColorPalette.primary, + borderRadius: 24, + }, + buttonText: { color: 'white', fontSize: 15, fontWeight: '600' }, + loadingOverlay: { + ...StyleSheet.absoluteFillObject, + backgroundColor: 'rgba(0,0,0,0.6)', + justifyContent: 'center', + alignItems: 'center', + }, + topBarWrapper: { + position: 'absolute', + top: 0, + left: 0, + right: 0, + }, + pickerContent: { + paddingHorizontal: 12, + gap: 8, + }, + chip: { + paddingHorizontal: 16, + paddingVertical: 8, + borderRadius: 20, + backgroundColor: 'rgba(0,0,0,0.55)', + borderWidth: 1, + borderColor: 'rgba(255,255,255,0.2)', + }, + chipActive: { + backgroundColor: ColorPalette.primary, + borderColor: ColorPalette.primary, + }, + chipText: { + color: 'rgba(255,255,255,0.8)', + fontSize: 13, + fontWeight: '600', + }, + chipTextActive: { color: 'white' }, + bbox: { + position: 'absolute', + borderWidth: 2, + borderColor: ColorPalette.primary, + borderRadius: 4, + }, + bboxLabel: { + position: 'absolute', + top: -22, + left: -2, + backgroundColor: ColorPalette.primary, + paddingHorizontal: 6, + paddingVertical: 2, + borderRadius: 4, + }, + bboxLabelText: { color: 'white', fontSize: 11, fontWeight: '600' }, + bottomBarWrapper: { + position: 'absolute', + bottom: 0, + left: 0, + right: 0, + alignItems: 'center', + }, + bottomBar: { + flexDirection: 'row', + alignItems: 'center', + backgroundColor: 'rgba(0,0,0,0.55)', + borderRadius: 24, + paddingHorizontal: 28, + paddingVertical: 10, + gap: 24, + }, + resultContainer: { alignItems: 'flex-start', maxWidth: 220 }, + resultText: { + color: 'white', + fontSize: 16, + fontWeight: '700', + }, + resultSub: { + color: 'rgba(255,255,255,0.6)', + fontSize: 12, + fontWeight: '500', + }, + statDivider: { + width: 1, + height: 32, + backgroundColor: 'rgba(255,255,255,0.2)', + }, + statItem: { alignItems: 'center' }, + statValue: { + color: 'white', + fontSize: 22, + fontWeight: '700', + letterSpacing: -0.5, + }, + statLabel: { + color: 'rgba(255,255,255,0.55)', + fontSize: 11, + fontWeight: '500', + textTransform: 'uppercase', + letterSpacing: 0.8, + }, +}); diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h index 7b97108b9..d0cba9916 100644 --- a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h +++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h @@ -15,11 +15,13 @@ #include #include +#include #include #include #include #include #include +#include #include using namespace rnexecutorch::models::speech_to_text::types; @@ -559,4 +561,53 @@ inline jsi::Value getJsiValue(const TranscriptionResult &result, return obj; } +inline jsi::Value +getJsiValue(const models::style_transfer::PixelDataResult &result, + jsi::Runtime &runtime) { + jsi::Object obj(runtime); + + auto arrayBuffer = jsi::ArrayBuffer(runtime, result.dataPtr); + auto uint8ArrayCtor = + runtime.global().getPropertyAsFunction(runtime, "Uint8Array"); + auto uint8Array = + uint8ArrayCtor.callAsConstructor(runtime, arrayBuffer).getObject(runtime); + obj.setProperty(runtime, "dataPtr", uint8Array); + + auto sizesArray = jsi::Array(runtime, 3); + sizesArray.setValueAtIndex(runtime, 0, jsi::Value(result.height)); + sizesArray.setValueAtIndex(runtime, 1, jsi::Value(result.width)); + sizesArray.setValueAtIndex(runtime, 2, jsi::Value(4)); + obj.setProperty(runtime, "sizes", sizesArray); + + obj.setProperty(runtime, "scalarType", jsi::Value(0)); + + return obj; +} + +inline jsi::Value +getJsiValue(const models::image_segmentation::SegmentationResult &result, + jsi::Runtime &runtime) { + jsi::Object dict(runtime); + + auto argmaxArrayBuffer = jsi::ArrayBuffer(runtime, result.argmax); + auto int32ArrayCtor = + runtime.global().getPropertyAsFunction(runtime, "Int32Array"); + auto int32Array = int32ArrayCtor.callAsConstructor(runtime, argmaxArrayBuffer) + .getObject(runtime); + dict.setProperty(runtime, "ARGMAX", int32Array); + + for (auto &[classLabel, owningBuffer] : *result.classBuffers) { + auto classArrayBuffer = jsi::ArrayBuffer(runtime, owningBuffer); + auto float32ArrayCtor = + runtime.global().getPropertyAsFunction(runtime, "Float32Array"); + auto float32Array = + float32ArrayCtor.callAsConstructor(runtime, classArrayBuffer) + .getObject(runtime); + dict.setProperty(runtime, jsi::String::createFromAscii(runtime, classLabel), + float32Array); + } + + return dict; +} + } // namespace rnexecutorch::jsi_conversion diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp index c0ce049f2..8f67175c4 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp @@ -11,7 +11,16 @@ using namespace facebook; cv::Mat VisionModel::extractFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData) const { auto frameObj = frameData.asObject(runtime); - return ::rnexecutorch::utils::extractFrame(runtime, frameObj); + cv::Mat frame = ::rnexecutorch::utils::extractFrame(runtime, frameObj); + + // Camera sensors natively deliver frames in landscape orientation. + // Rotate 90Β° CW so all models receive upright portrait frames. + if (frame.cols > frame.rows) { + cv::Mat upright; + cv::rotate(frame, upright, cv::ROTATE_90_CLOCKWISE); + return upright; + } + return frame; } cv::Mat VisionModel::extractFromPixels(const JSTensorViewIn &tensorView) const { diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h index e0ec03912..a2a461772 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h @@ -53,6 +53,20 @@ class VisionModel : public BaseModel { virtual ~VisionModel() = default; + /** + * @brief Thread-safe unload that waits for any in-flight inference to + * complete + * + * Overrides BaseModel::unload() to acquire inference_mutex_ before + * resetting the module. This prevents a crash where BaseModel::unload() + * destroys module_ while generateFromFrame() is still executing on the + * VisionCamera worklet thread. + */ + void unload() noexcept { + std::scoped_lock lock(inference_mutex_); + BaseModel::unload(); + } + protected: /** * @brief Mutex to ensure thread-safe inference diff --git a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp index 0fba07108..2a00d5dce 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp @@ -12,7 +12,7 @@ namespace rnexecutorch::models::classification { Classification::Classification(const std::string &modelSource, std::shared_ptr callInvoker) - : BaseModel(modelSource, callInvoker) { + : VisionModel(modelSource, callInvoker) { auto inputShapes = getAllInputShapes(); if (inputShapes.size() == 0) { throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs, @@ -32,20 +32,78 @@ Classification::Classification(const std::string &modelSource, modelInputShape[modelInputShape.size() - 2]); } +cv::Mat Classification::preprocessFrame(const cv::Mat &frame) const { + cv::Mat rgb; + + if (frame.channels() == 4) { +#ifdef __APPLE__ + cv::cvtColor(frame, rgb, cv::COLOR_BGRA2RGB); +#else + cv::cvtColor(frame, rgb, cv::COLOR_RGBA2RGB); +#endif + } else if (frame.channels() == 3) { + rgb = frame; + } else { + char errorMessage[100]; + std::snprintf(errorMessage, sizeof(errorMessage), + "Unsupported frame format: %d channels", frame.channels()); + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + errorMessage); + } + + if (rgb.size() != modelImageSize) { + cv::Mat resized; + cv::resize(rgb, resized, modelImageSize); + return resized; + } + + return rgb; +} + std::unordered_map -Classification::generate(std::string imageSource) { +Classification::runInference(cv::Mat image) { + std::scoped_lock lock(inference_mutex_); + + cv::Mat preprocessed = preprocessFrame(image); + + const std::vector tensorDims = getAllInputShapes()[0]; auto inputTensor = - image_processing::readImageToTensor(imageSource, getAllInputShapes()[0]) - .first; + image_processing::getTensorFromMatrix(tensorDims, preprocessed); + auto forwardResult = BaseModel::forward(inputTensor); if (!forwardResult.ok()) { throw RnExecutorchError(forwardResult.error(), "The model's forward function did not succeed. " "Ensure the model input is correct."); } + return postprocess(forwardResult->at(0).toTensor()); } +std::unordered_map +Classification::generateFromString(std::string imageSource) { + cv::Mat imageBGR = image_processing::readImage(imageSource); + + cv::Mat imageRGB; + cv::cvtColor(imageBGR, imageRGB, cv::COLOR_BGR2RGB); + + return runInference(imageRGB); +} + +std::unordered_map +Classification::generateFromFrame(jsi::Runtime &runtime, + const jsi::Value &frameData) { + cv::Mat frame = extractFromFrame(runtime, frameData); + return runInference(frame); +} + +std::unordered_map +Classification::generateFromPixels(JSTensorViewIn pixelData) { + cv::Mat image = extractFromPixels(pixelData); + + return runInference(image); +} + std::unordered_map Classification::postprocess(const Tensor &tensor) { std::span resultData( diff --git a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.h b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.h index 1465fc5f9..473d9b4bb 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.h @@ -3,25 +3,40 @@ #include #include +#include #include #include "rnexecutorch/metaprogramming/ConstructorHelpers.h" -#include +#include namespace rnexecutorch { namespace models::classification { using executorch::aten::Tensor; using executorch::extension::TensorPtr; -class Classification : public BaseModel { +class Classification : public VisionModel { public: Classification(const std::string &modelSource, std::shared_ptr callInvoker); + [[nodiscard("Registered non-void function")]] std::unordered_map< std::string_view, float> - generate(std::string imageSource); + generateFromString(std::string imageSource); + + [[nodiscard("Registered non-void function")]] std::unordered_map< + std::string_view, float> + generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData); + + [[nodiscard("Registered non-void function")]] std::unordered_map< + std::string_view, float> + generateFromPixels(JSTensorViewIn pixelData); + +protected: + cv::Mat preprocessFrame(const cv::Mat &frame) const override; private: + std::unordered_map runInference(cv::Mat image); + std::unordered_map postprocess(const Tensor &tensor); cv::Size modelImageSize{0, 0}; diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp index ec3129e76..a82fffbb2 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp @@ -1,17 +1,18 @@ #include "ImageEmbeddings.h" +#include + #include #include #include #include -#include namespace rnexecutorch::models::embeddings { ImageEmbeddings::ImageEmbeddings( const std::string &modelSource, std::shared_ptr callInvoker) - : BaseEmbeddings(modelSource, callInvoker) { + : VisionModel(modelSource, callInvoker) { auto inputTensors = getAllInputShapes(); if (inputTensors.size() == 0) { throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs, @@ -31,10 +32,43 @@ ImageEmbeddings::ImageEmbeddings( modelInputShape[modelInputShape.size() - 2]); } +cv::Mat ImageEmbeddings::preprocessFrame(const cv::Mat &frame) const { + cv::Mat rgb; + + if (frame.channels() == 4) { +#ifdef __APPLE__ + cv::cvtColor(frame, rgb, cv::COLOR_BGRA2RGB); +#else + cv::cvtColor(frame, rgb, cv::COLOR_RGBA2RGB); +#endif + } else if (frame.channels() == 3) { + rgb = frame; + } else { + char errorMessage[100]; + std::snprintf(errorMessage, sizeof(errorMessage), + "Unsupported frame format: %d channels", frame.channels()); + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + errorMessage); + } + + if (rgb.size() != modelImageSize) { + cv::Mat resized; + cv::resize(rgb, resized, modelImageSize); + return resized; + } + + return rgb; +} + std::shared_ptr -ImageEmbeddings::generate(std::string imageSource) { - auto [inputTensor, originalSize] = - image_processing::readImageToTensor(imageSource, getAllInputShapes()[0]); +ImageEmbeddings::runInference(cv::Mat image) { + std::scoped_lock lock(inference_mutex_); + + cv::Mat preprocessed = preprocessFrame(image); + + const std::vector tensorDims = getAllInputShapes()[0]; + auto inputTensor = + image_processing::getTensorFromMatrix(tensorDims, preprocessed); auto forwardResult = BaseModel::forward(inputTensor); @@ -45,7 +79,33 @@ ImageEmbeddings::generate(std::string imageSource) { "is correct."); } - return BaseEmbeddings::postprocess(forwardResult); + auto forwardResultTensor = forwardResult->at(0).toTensor(); + return std::make_shared( + forwardResultTensor.const_data_ptr(), forwardResultTensor.nbytes()); +} + +std::shared_ptr +ImageEmbeddings::generateFromString(std::string imageSource) { + cv::Mat imageBGR = image_processing::readImage(imageSource); + + cv::Mat imageRGB; + cv::cvtColor(imageBGR, imageRGB, cv::COLOR_BGR2RGB); + + return runInference(imageRGB); +} + +std::shared_ptr +ImageEmbeddings::generateFromFrame(jsi::Runtime &runtime, + const jsi::Value &frameData) { + cv::Mat frame = extractFromFrame(runtime, frameData); + return runInference(frame); +} + +std::shared_ptr +ImageEmbeddings::generateFromPixels(JSTensorViewIn pixelData) { + cv::Mat image = extractFromPixels(pixelData); + + return runInference(image); } } // namespace rnexecutorch::models::embeddings diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h index 7e114e939..ec11ee5c6 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h @@ -2,25 +2,41 @@ #include #include +#include #include #include "rnexecutorch/metaprogramming/ConstructorHelpers.h" -#include +#include +#include namespace rnexecutorch { namespace models::embeddings { using executorch::extension::TensorPtr; using executorch::runtime::EValue; -class ImageEmbeddings final : public BaseEmbeddings { +class ImageEmbeddings final : public VisionModel { public: ImageEmbeddings(const std::string &modelSource, std::shared_ptr callInvoker); + [[nodiscard( "Registered non-void function")]] std::shared_ptr - generate(std::string imageSource); + generateFromString(std::string imageSource); + + [[nodiscard( + "Registered non-void function")]] std::shared_ptr + generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData); + + [[nodiscard( + "Registered non-void function")]] std::shared_ptr + generateFromPixels(JSTensorViewIn pixelData); + +protected: + cv::Mat preprocessFrame(const cv::Mat &frame) const override; private: + std::shared_ptr runInference(cv::Mat image); + cv::Size modelImageSize{0, 0}; }; } // namespace models::embeddings diff --git a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.cpp b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.cpp index 141ec430e..3a2bfd0cf 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.cpp @@ -1,7 +1,4 @@ #include "BaseImageSegmentation.h" -#include "jsi/jsi.h" - -#include #include #include @@ -14,14 +11,14 @@ namespace rnexecutorch::models::image_segmentation { BaseImageSegmentation::BaseImageSegmentation( const std::string &modelSource, std::shared_ptr callInvoker) - : BaseModel(modelSource, callInvoker) { + : VisionModel(modelSource, callInvoker) { initModelImageSize(); } BaseImageSegmentation::BaseImageSegmentation( const std::string &modelSource, std::vector normMean, std::vector normStd, std::shared_ptr callInvoker) - : BaseModel(modelSource, callInvoker) { + : VisionModel(modelSource, callInvoker) { initModelImageSize(); if (normMean.size() == 3) { normMean_ = cv::Scalar(normMean[0], normMean[1], normMean[2]); @@ -55,7 +52,43 @@ void BaseImageSegmentation::initModelImageSize() { numModelPixels = modelImageSize.area(); } -TensorPtr BaseImageSegmentation::preprocess(const std::string &imageSource, +cv::Mat BaseImageSegmentation::preprocessFrame(const cv::Mat &frame) const { + cv::Mat rgb; + + if (frame.channels() == 4) { +#ifdef __APPLE__ + cv::cvtColor(frame, rgb, cv::COLOR_BGRA2RGB); +#else + cv::cvtColor(frame, rgb, cv::COLOR_RGBA2RGB); +#endif + } else if (frame.channels() == 3) { + rgb = frame; + } else { + char errorMessage[100]; + std::snprintf(errorMessage, sizeof(errorMessage), + "Unsupported frame format: %d channels", frame.channels()); + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + errorMessage); + } + + cv::Mat processed; + if (rgb.size() != modelImageSize) { + cv::resize(rgb, processed, modelImageSize); + } else { + processed = rgb; + } + + if (normMean_.has_value() && normStd_.has_value()) { + processed.convertTo(processed, CV_32FC3, 1.0 / 255.0); + processed -= *normMean_; + processed /= *normStd_; + } + + return processed; +} + +TensorPtr +BaseImageSegmentation::preprocessFromString(const std::string &imageSource, cv::Size &originalSize) { auto [inputTensor, origSize] = image_processing::readImageToTensor( imageSource, getAllInputShapes()[0], false, normMean_, normStd_); @@ -63,12 +96,35 @@ TensorPtr BaseImageSegmentation::preprocess(const std::string &imageSource, return inputTensor; } -std::shared_ptr BaseImageSegmentation::generate( +SegmentationResult BaseImageSegmentation::runInference( + cv::Mat image, cv::Size originalSize, std::vector allClasses, + std::set> classesOfInterest, bool resize) { + std::scoped_lock lock(inference_mutex_); + + cv::Mat preprocessed = preprocessFrame(image); + + const std::vector tensorDims = getAllInputShapes()[0]; + auto inputTensor = + image_processing::getTensorFromMatrix(tensorDims, preprocessed); + + auto forwardResult = BaseModel::forward(inputTensor); + + if (!forwardResult.ok()) { + throw RnExecutorchError(forwardResult.error(), + "The model's forward function did not succeed. " + "Ensure the model input is correct."); + } + + return postprocess(forwardResult->at(0).toTensor(), originalSize, allClasses, + classesOfInterest, resize); +} + +SegmentationResult BaseImageSegmentation::generateFromString( std::string imageSource, std::vector allClasses, std::set> classesOfInterest, bool resize) { cv::Size originalSize; - auto inputTensor = preprocess(imageSource, originalSize); + auto inputTensor = preprocessFromString(imageSource, originalSize); auto forwardResult = BaseModel::forward(inputTensor); @@ -82,7 +138,29 @@ std::shared_ptr BaseImageSegmentation::generate( classesOfInterest, resize); } -std::shared_ptr BaseImageSegmentation::postprocess( +SegmentationResult BaseImageSegmentation::generateFromFrame( + jsi::Runtime &runtime, const jsi::Value &frameData, + std::vector allClasses, + std::set> classesOfInterest, bool resize) { + // extractFromFrame rotates landscape frames 90Β° CW automatically. + cv::Mat frame = extractFromFrame(runtime, frameData); + cv::Size originalSize = frame.size(); + + return runInference(frame, originalSize, std::move(allClasses), + std::move(classesOfInterest), resize); +} + +SegmentationResult BaseImageSegmentation::generateFromPixels( + JSTensorViewIn pixelData, std::vector allClasses, + std::set> classesOfInterest, bool resize) { + cv::Mat image = extractFromPixels(pixelData); + cv::Size originalSize = image.size(); + + return runInference(image, originalSize, std::move(allClasses), + std::move(classesOfInterest), resize); +} + +SegmentationResult BaseImageSegmentation::postprocess( const Tensor &tensor, cv::Size originalSize, std::vector &allClasses, std::set> &classesOfInterest, bool resize) { @@ -167,8 +245,8 @@ std::shared_ptr BaseImageSegmentation::postprocess( } // Filter classes of interest - auto buffersToReturn = std::make_shared>>(); + auto buffersToReturn = std::make_shared< + std::unordered_map>>(); for (std::size_t cl = 0; cl < resultClasses.size(); ++cl) { if (cl < allClasses.size() && classesOfInterest.contains(allClasses[cl])) { (*buffersToReturn)[allClasses[cl]] = resultClasses[cl]; @@ -191,48 +269,7 @@ std::shared_ptr BaseImageSegmentation::postprocess( } } - return populateDictionary(argmax, buffersToReturn); -} - -std::shared_ptr BaseImageSegmentation::populateDictionary( - std::shared_ptr argmax, - std::shared_ptr>> - classesToOutput) { - auto promisePtr = std::make_shared>(); - std::future doneFuture = promisePtr->get_future(); - - std::shared_ptr dictPtr = nullptr; - callInvoker->invokeAsync( - [argmax, classesToOutput, &dictPtr, promisePtr](jsi::Runtime &runtime) { - dictPtr = std::make_shared(runtime); - auto argmaxArrayBuffer = jsi::ArrayBuffer(runtime, argmax); - - auto int32ArrayCtor = - runtime.global().getPropertyAsFunction(runtime, "Int32Array"); - auto int32Array = - int32ArrayCtor.callAsConstructor(runtime, argmaxArrayBuffer) - .getObject(runtime); - dictPtr->setProperty(runtime, "ARGMAX", int32Array); - - for (auto &[classLabel, owningBuffer] : *classesToOutput) { - auto classArrayBuffer = jsi::ArrayBuffer(runtime, owningBuffer); - - auto float32ArrayCtor = - runtime.global().getPropertyAsFunction(runtime, "Float32Array"); - auto float32Array = - float32ArrayCtor.callAsConstructor(runtime, classArrayBuffer) - .getObject(runtime); - - dictPtr->setProperty( - runtime, jsi::String::createFromAscii(runtime, classLabel.data()), - float32Array); - } - promisePtr->set_value(); - }); - - doneFuture.wait(); - return dictPtr; + return SegmentationResult{argmax, buffersToReturn}; } } // namespace rnexecutorch::models::image_segmentation diff --git a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h index f46f41d69..49daf5ee5 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/BaseImageSegmentation.h @@ -8,7 +8,8 @@ #include "rnexecutorch/metaprogramming/ConstructorHelpers.h" #include -#include +#include +#include namespace rnexecutorch { namespace models::image_segmentation { @@ -17,7 +18,7 @@ using namespace facebook; using executorch::aten::Tensor; using executorch::extension::TensorPtr; -class BaseImageSegmentation : public BaseModel { +class BaseImageSegmentation : public VisionModel { public: BaseImageSegmentation(const std::string &modelSource, std::shared_ptr callInvoker); @@ -26,14 +27,28 @@ class BaseImageSegmentation : public BaseModel { std::vector normMean, std::vector normStd, std::shared_ptr callInvoker); - [[nodiscard("Registered non-void function")]] std::shared_ptr - generate(std::string imageSource, std::vector allClasses, - std::set> classesOfInterest, bool resize); + [[nodiscard("Registered non-void function")]] SegmentationResult + generateFromString(std::string imageSource, + std::vector allClasses, + std::set> classesOfInterest, + bool resize); + + [[nodiscard("Registered non-void function")]] SegmentationResult + generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData, + std::vector allClasses, + std::set> classesOfInterest, + bool resize); + + [[nodiscard("Registered non-void function")]] SegmentationResult + generateFromPixels(JSTensorViewIn pixelData, + std::vector allClasses, + std::set> classesOfInterest, + bool resize); protected: - virtual TensorPtr preprocess(const std::string &imageSource, - cv::Size &originalSize); - virtual std::shared_ptr + cv::Mat preprocessFrame(const cv::Mat &frame) const override; + + virtual SegmentationResult postprocess(const Tensor &tensor, cv::Size originalSize, std::vector &allClasses, std::set> &classesOfInterest, @@ -44,14 +59,15 @@ class BaseImageSegmentation : public BaseModel { std::optional normMean_; std::optional normStd_; - std::shared_ptr populateDictionary( - std::shared_ptr argmax, - std::shared_ptr>> - classesToOutput); - private: void initModelImageSize(); + + SegmentationResult runInference( + cv::Mat image, cv::Size originalSize, std::vector allClasses, + std::set> classesOfInterest, bool resize); + + TensorPtr preprocessFromString(const std::string &imageSource, + cv::Size &originalSize); }; } // namespace models::image_segmentation diff --git a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/Types.h b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/Types.h new file mode 100644 index 000000000..b5d6f5067 --- /dev/null +++ b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/Types.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include +#include +#include + +namespace rnexecutorch::models::image_segmentation { + +struct SegmentationResult { + std::shared_ptr argmax; + std::shared_ptr< + std::unordered_map>> + classBuffers; +}; + +} // namespace rnexecutorch::models::image_segmentation diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp index 2670cf9dd..7f7216b02 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp @@ -5,7 +5,6 @@ #include #include #include -#include namespace rnexecutorch::models::object_detection { @@ -144,9 +143,7 @@ std::vector ObjectDetection::generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData, double detectionThreshold) { - auto frameObj = frameData.asObject(runtime); - cv::Mat frame = rnexecutorch::utils::extractFrame(runtime, frameObj); - + cv::Mat frame = extractFromFrame(runtime, frameData); return runInference(frame, detectionThreshold); } diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.cpp b/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.cpp index a521b4e8b..50834a1b8 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace rnexecutorch::models::ocr { OCR::OCR(const std::string &detectorSource, const std::string &recognizerSource, @@ -12,12 +13,8 @@ OCR::OCR(const std::string &detectorSource, const std::string &recognizerSource, : detector(detectorSource, callInvoker), recognitionHandler(recognizerSource, symbols, callInvoker) {} -std::vector OCR::generate(std::string input) { - cv::Mat image = image_processing::readImage(input); - if (image.empty()) { - throw RnExecutorchError(RnExecutorchErrorCode::FileReadFailed, - "Failed to load image from path: " + input); - } +std::vector OCR::runInference(cv::Mat image) { + std::scoped_lock lock(inference_mutex_); /* 1. Detection process returns the list of bounding boxes containing areas @@ -43,6 +40,63 @@ std::vector OCR::generate(std::string input) { return result; } +std::vector OCR::generateFromString(std::string input) { + cv::Mat image = image_processing::readImage(input); + if (image.empty()) { + throw RnExecutorchError(RnExecutorchErrorCode::FileReadFailed, + "Failed to load image from path: " + input); + } + return runInference(image); +} + +std::vector +OCR::generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData) { + auto frameObj = frameData.asObject(runtime); + cv::Mat frame = ::rnexecutorch::utils::extractFrame(runtime, frameObj); + // extractFrame returns RGB; convert to BGR for consistency with readImage + cv::cvtColor(frame, frame, cv::COLOR_RGB2BGR); + return runInference(frame); +} + +std::vector +OCR::generateFromPixels(JSTensorViewIn pixelData) { + if (pixelData.sizes.size() != 3) { + char errorMessage[100]; + std::snprintf(errorMessage, sizeof(errorMessage), + "Invalid pixel data: sizes must have 3 elements " + "[height, width, channels], got %zu", + pixelData.sizes.size()); + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + errorMessage); + } + + int32_t height = pixelData.sizes[0]; + int32_t width = pixelData.sizes[1]; + int32_t channels = pixelData.sizes[2]; + + if (channels != 3) { + char errorMessage[100]; + std::snprintf(errorMessage, sizeof(errorMessage), + "Invalid pixel data: expected 3 channels (RGB), got %d", + channels); + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + errorMessage); + } + + if (pixelData.scalarType != executorch::aten::ScalarType::Byte) { + throw RnExecutorchError( + RnExecutorchErrorCode::InvalidUserInput, + "Invalid pixel data: scalarType must be BYTE (Uint8Array)"); + } + + uint8_t *dataPtr = static_cast(pixelData.dataPtr); + // Input is RGB from JS; convert to BGR for consistency with readImage + cv::Mat rgbImage(height, width, CV_8UC3, dataPtr); + cv::Mat image; + cv::cvtColor(rgbImage, image, cv::COLOR_RGB2BGR); + return runInference(image); +} + std::size_t OCR::getMemoryLowerBound() const noexcept { return detector.getMemoryLowerBound() + recognitionHandler.getMemoryLowerBound(); diff --git a/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.h b/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.h index d84ba903f..719cb957c 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.h @@ -1,9 +1,11 @@ #pragma once +#include #include #include #include "rnexecutorch/metaprogramming/ConstructorHelpers.h" +#include #include #include #include @@ -28,13 +30,20 @@ class OCR final { const std::string &recognizerSource, const std::string &symbols, std::shared_ptr callInvoker); [[nodiscard("Registered non-void function")]] std::vector - generate(std::string input); + generateFromString(std::string input); + [[nodiscard("Registered non-void function")]] std::vector + generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData); + [[nodiscard("Registered non-void function")]] std::vector + generateFromPixels(JSTensorViewIn pixelData); std::size_t getMemoryLowerBound() const noexcept; void unload() noexcept; private: + std::vector runInference(cv::Mat image); + Detector detector; RecognitionHandler recognitionHandler; + mutable std::mutex inference_mutex_; }; } // namespace models::ocr diff --git a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp index 3b9c0187b..c334f5d84 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace rnexecutorch::models::style_transfer { using namespace facebook; @@ -13,7 +14,7 @@ using executorch::extension::TensorPtr; StyleTransfer::StyleTransfer(const std::string &modelSource, std::shared_ptr callInvoker) - : BaseModel(modelSource, callInvoker) { + : VisionModel(modelSource, callInvoker) { auto inputShapes = getAllInputShapes(); if (inputShapes.size() == 0) { throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs, @@ -33,17 +34,67 @@ StyleTransfer::StyleTransfer(const std::string &modelSource, modelInputShape[modelInputShape.size() - 2]); } -std::string StyleTransfer::postprocess(const Tensor &tensor, - cv::Size originalSize) { +cv::Mat StyleTransfer::preprocessFrame(const cv::Mat &frame) const { + cv::Mat rgb; + + if (frame.channels() == 4) { +#ifdef __APPLE__ + cv::cvtColor(frame, rgb, cv::COLOR_BGRA2RGB); +#else + cv::cvtColor(frame, rgb, cv::COLOR_RGBA2RGB); +#endif + } else if (frame.channels() == 3) { + rgb = frame; + } else { + char errorMessage[100]; + std::snprintf(errorMessage, sizeof(errorMessage), + "Unsupported frame format: %d channels", frame.channels()); + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + errorMessage); + } + + if (rgb.size() != modelImageSize) { + cv::Mat resized; + cv::resize(rgb, resized, modelImageSize); + return resized; + } + + return rgb; +} + +PixelDataResult StyleTransfer::postprocess(const Tensor &tensor, + cv::Size outputSize) { + // Convert tensor output (at modelImageSize) to CV_8UC3 BGR mat cv::Mat mat = image_processing::getMatrixFromTensor(modelImageSize, tensor); - cv::resize(mat, mat, originalSize); - return image_processing::saveToTempFile(mat); + // Resize only if requested output differs from model output size + if (mat.size() != outputSize) { + cv::resize(mat, mat, outputSize); + } + + // Convert BGR -> RGBA so JS can pass the buffer directly to Skia + cv::Mat rgba; + cv::cvtColor(mat, rgba, cv::COLOR_BGR2RGBA); + + std::size_t dataSize = + static_cast(outputSize.width) * outputSize.height * 4; + auto pixelBuffer = std::make_shared(rgba.data, dataSize); + log(LOG_LEVEL::Debug, + "[StyleTransfer] postprocess: RGBA buffer size:", dataSize, + "w:", outputSize.width, "h:", outputSize.height); + + return PixelDataResult{pixelBuffer, outputSize.width, outputSize.height}; } -std::string StyleTransfer::generate(std::string imageSource) { - auto [inputTensor, originalSize] = - image_processing::readImageToTensor(imageSource, getAllInputShapes()[0]); +PixelDataResult StyleTransfer::runInference(cv::Mat image, + cv::Size originalSize) { + std::scoped_lock lock(inference_mutex_); + + cv::Mat preprocessed = preprocessFrame(image); + + const std::vector tensorDims = getAllInputShapes()[0]; + auto inputTensor = + image_processing::getTensorFromMatrix(tensorDims, preprocessed); auto forwardResult = BaseModel::forward(inputTensor); if (!forwardResult.ok()) { @@ -55,4 +106,31 @@ std::string StyleTransfer::generate(std::string imageSource) { return postprocess(forwardResult->at(0).toTensor(), originalSize); } +PixelDataResult StyleTransfer::generateFromString(std::string imageSource) { + cv::Mat imageBGR = image_processing::readImage(imageSource); + cv::Size originalSize = imageBGR.size(); + + cv::Mat imageRGB; + cv::cvtColor(imageBGR, imageRGB, cv::COLOR_BGR2RGB); + + return runInference(imageRGB, originalSize); +} + +PixelDataResult StyleTransfer::generateFromFrame(jsi::Runtime &runtime, + const jsi::Value &frameData) { + // extractFromFrame rotates landscape frames 90Β° CW automatically. + cv::Mat frame = extractFromFrame(runtime, frameData); + + // For real-time frame processing, output at modelImageSize to avoid + // allocating large buffers (e.g. 1280x720x3 ~2.7MB) on every frame. + return runInference(frame, modelImageSize); +} + +PixelDataResult StyleTransfer::generateFromPixels(JSTensorViewIn pixelData) { + cv::Mat image = extractFromPixels(pixelData); + cv::Size originalSize = image.size(); + + return runInference(image, originalSize); +} + } // namespace rnexecutorch::models::style_transfer diff --git a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h index 73744c4d8..99f9f4b3a 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h @@ -9,7 +9,9 @@ #include #include "rnexecutorch/metaprogramming/ConstructorHelpers.h" -#include +#include +#include +#include namespace rnexecutorch { namespace models::style_transfer { @@ -17,15 +19,30 @@ using namespace facebook; using executorch::aten::Tensor; using executorch::extension::TensorPtr; -class StyleTransfer : public BaseModel { +class StyleTransfer : public VisionModel { public: StyleTransfer(const std::string &modelSource, std::shared_ptr callInvoker); - [[nodiscard("Registered non-void function")]] std::string - generate(std::string imageSource); + + [[nodiscard("Registered non-void function")]] PixelDataResult + generateFromString(std::string imageSource); + + [[nodiscard("Registered non-void function")]] PixelDataResult + generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData); + + [[nodiscard("Registered non-void function")]] PixelDataResult + generateFromPixels(JSTensorViewIn pixelData); + +protected: + cv::Mat preprocessFrame(const cv::Mat &frame) const override; private: - std::string postprocess(const Tensor &tensor, cv::Size originalSize); + // outputSize: size to resize the styled output to before returning. + // Pass modelImageSize for real-time frame processing (avoids large allocs). + // Pass the source image size for generateFromString/generateFromPixels. + PixelDataResult runInference(cv::Mat image, cv::Size outputSize); + + PixelDataResult postprocess(const Tensor &tensor, cv::Size outputSize); cv::Size modelImageSize{0, 0}; }; diff --git a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/Types.h b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/Types.h new file mode 100644 index 000000000..f677183a6 --- /dev/null +++ b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/Types.h @@ -0,0 +1,14 @@ +#pragma once + +#include +#include + +namespace rnexecutorch::models::style_transfer { + +struct PixelDataResult { + std::shared_ptr dataPtr; + int width; + int height; +}; + +} // namespace rnexecutorch::models::style_transfer diff --git a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp index 0f75d2015..71ea737f8 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.cpp @@ -1,10 +1,12 @@ #include "VerticalOCR.h" #include #include +#include #include #include #include #include +#include #include namespace rnexecutorch::models::ocr { @@ -16,12 +18,9 @@ VerticalOCR::VerticalOCR(const std::string &detectorSource, converter(symbols), independentCharacters(independentChars), callInvoker(invoker) {} -std::vector VerticalOCR::generate(std::string input) { - cv::Mat image = image_processing::readImage(input); - if (image.empty()) { - throw RnExecutorchError(RnExecutorchErrorCode::FileReadFailed, - "Failed to load image from path: " + input); - } +std::vector VerticalOCR::runInference(cv::Mat image) { + std::scoped_lock lock(inference_mutex_); + // 1. Large Detector std::vector largeBoxes = detector.generate(image, constants::kLargeDetectorWidth); @@ -44,6 +43,65 @@ std::vector VerticalOCR::generate(std::string input) { return predictions; } +std::vector +VerticalOCR::generateFromString(std::string input) { + cv::Mat image = image_processing::readImage(input); + if (image.empty()) { + throw RnExecutorchError(RnExecutorchErrorCode::FileReadFailed, + "Failed to load image from path: " + input); + } + return runInference(image); +} + +std::vector +VerticalOCR::generateFromFrame(jsi::Runtime &runtime, + const jsi::Value &frameData) { + auto frameObj = frameData.asObject(runtime); + cv::Mat frame = ::rnexecutorch::utils::extractFrame(runtime, frameObj); + // extractFrame returns RGB; convert to BGR for consistency with readImage + cv::cvtColor(frame, frame, cv::COLOR_RGB2BGR); + return runInference(frame); +} + +std::vector +VerticalOCR::generateFromPixels(JSTensorViewIn pixelData) { + if (pixelData.sizes.size() != 3) { + char errorMessage[100]; + std::snprintf(errorMessage, sizeof(errorMessage), + "Invalid pixel data: sizes must have 3 elements " + "[height, width, channels], got %zu", + pixelData.sizes.size()); + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + errorMessage); + } + + int32_t height = pixelData.sizes[0]; + int32_t width = pixelData.sizes[1]; + int32_t channels = pixelData.sizes[2]; + + if (channels != 3) { + char errorMessage[100]; + std::snprintf(errorMessage, sizeof(errorMessage), + "Invalid pixel data: expected 3 channels (RGB), got %d", + channels); + throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput, + errorMessage); + } + + if (pixelData.scalarType != executorch::aten::ScalarType::Byte) { + throw RnExecutorchError( + RnExecutorchErrorCode::InvalidUserInput, + "Invalid pixel data: scalarType must be BYTE (Uint8Array)"); + } + + uint8_t *dataPtr = static_cast(pixelData.dataPtr); + // Input is RGB from JS; convert to BGR for consistency with readImage + cv::Mat rgbImage(height, width, CV_8UC3, dataPtr); + cv::Mat image; + cv::cvtColor(rgbImage, image, cv::COLOR_RGB2BGR); + return runInference(image); +} + std::size_t VerticalOCR::getMemoryLowerBound() const noexcept { return detector.getMemoryLowerBound() + recognizer.getMemoryLowerBound(); } diff --git a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h index e97fb9034..4016e2813 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/vertical_ocr/VerticalOCR.h @@ -1,12 +1,14 @@ #pragma once #include +#include #include #include #include #include #include "rnexecutorch/metaprogramming/ConstructorHelpers.h" +#include #include #include #include @@ -48,11 +50,17 @@ class VerticalOCR final { bool indpendentCharacters, std::shared_ptr callInvoker); [[nodiscard("Registered non-void function")]] std::vector - generate(std::string input); + generateFromString(std::string input); + [[nodiscard("Registered non-void function")]] std::vector + generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData); + [[nodiscard("Registered non-void function")]] std::vector + generateFromPixels(JSTensorViewIn pixelData); std::size_t getMemoryLowerBound() const noexcept; void unload() noexcept; private: + std::vector runInference(cv::Mat image); + std::pair _handleIndependentCharacters( const types::DetectorBBox &box, const cv::Mat &originalImage, const std::vector &characterBoxes, @@ -75,6 +83,7 @@ class VerticalOCR final { CTCLabelConverter converter; bool independentCharacters; std::shared_ptr callInvoker; + mutable std::mutex inference_mutex_; }; } // namespace models::ocr diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/ClassificationTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/ClassificationTest.cpp index 10aa663a4..b64f167c9 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/ClassificationTest.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/ClassificationTest.cpp @@ -28,7 +28,7 @@ template <> struct ModelTraits { } static void callGenerate(ModelType &model) { - (void)model.generate(kValidTestImagePath); + (void)model.generateFromString(kValidTestImagePath); } }; } // namespace model_tests @@ -42,37 +42,37 @@ INSTANTIATE_TYPED_TEST_SUITE_P(Classification, CommonModelTest, // ============================================================================ TEST(ClassificationGenerateTests, InvalidImagePathThrows) { Classification model(kValidClassificationModelPath, nullptr); - EXPECT_THROW((void)model.generate("nonexistent_image.jpg"), + EXPECT_THROW((void)model.generateFromString("nonexistent_image.jpg"), RnExecutorchError); } TEST(ClassificationGenerateTests, EmptyImagePathThrows) { Classification model(kValidClassificationModelPath, nullptr); - EXPECT_THROW((void)model.generate(""), RnExecutorchError); + EXPECT_THROW((void)model.generateFromString(""), RnExecutorchError); } TEST(ClassificationGenerateTests, MalformedURIThrows) { Classification model(kValidClassificationModelPath, nullptr); - EXPECT_THROW((void)model.generate("not_a_valid_uri://bad"), + EXPECT_THROW((void)model.generateFromString("not_a_valid_uri://bad"), RnExecutorchError); } TEST(ClassificationGenerateTests, ValidImageReturnsResults) { Classification model(kValidClassificationModelPath, nullptr); - auto results = model.generate(kValidTestImagePath); + auto results = model.generateFromString(kValidTestImagePath); EXPECT_FALSE(results.empty()); } TEST(ClassificationGenerateTests, ResultsHaveCorrectSize) { Classification model(kValidClassificationModelPath, nullptr); - auto results = model.generate(kValidTestImagePath); + auto results = model.generateFromString(kValidTestImagePath); auto expectedNumClasses = constants::kImagenet1kV1Labels.size(); EXPECT_EQ(results.size(), expectedNumClasses); } TEST(ClassificationGenerateTests, ResultsContainValidProbabilities) { Classification model(kValidClassificationModelPath, nullptr); - auto results = model.generate(kValidTestImagePath); + auto results = model.generateFromString(kValidTestImagePath); float sum = 0.0f; for (const auto &[label, prob] : results) { @@ -85,7 +85,7 @@ TEST(ClassificationGenerateTests, ResultsContainValidProbabilities) { TEST(ClassificationGenerateTests, TopPredictionHasReasonableConfidence) { Classification model(kValidClassificationModelPath, nullptr); - auto results = model.generate(kValidTestImagePath); + auto results = model.generateFromString(kValidTestImagePath); float maxProb = 0.0f; for (const auto &[label, prob] : results) { diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/ImageEmbeddingsTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/ImageEmbeddingsTest.cpp index 3a2374695..ba76939a8 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/ImageEmbeddingsTest.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/ImageEmbeddingsTest.cpp @@ -29,7 +29,7 @@ template <> struct ModelTraits { } static void callGenerate(ModelType &model) { - (void)model.generate(kValidTestImagePath); + (void)model.generateFromString(kValidTestImagePath); } }; } // namespace model_tests @@ -43,31 +43,31 @@ INSTANTIATE_TYPED_TEST_SUITE_P(ImageEmbeddings, CommonModelTest, // ============================================================================ TEST(ImageEmbeddingsGenerateTests, InvalidImagePathThrows) { ImageEmbeddings model(kValidImageEmbeddingsModelPath, nullptr); - EXPECT_THROW((void)model.generate("nonexistent_image.jpg"), + EXPECT_THROW((void)model.generateFromString("nonexistent_image.jpg"), RnExecutorchError); } TEST(ImageEmbeddingsGenerateTests, EmptyImagePathThrows) { ImageEmbeddings model(kValidImageEmbeddingsModelPath, nullptr); - EXPECT_THROW((void)model.generate(""), RnExecutorchError); + EXPECT_THROW((void)model.generateFromString(""), RnExecutorchError); } TEST(ImageEmbeddingsGenerateTests, MalformedURIThrows) { ImageEmbeddings model(kValidImageEmbeddingsModelPath, nullptr); - EXPECT_THROW((void)model.generate("not_a_valid_uri://bad"), + EXPECT_THROW((void)model.generateFromString("not_a_valid_uri://bad"), RnExecutorchError); } TEST(ImageEmbeddingsGenerateTests, ValidImageReturnsResults) { ImageEmbeddings model(kValidImageEmbeddingsModelPath, nullptr); - auto result = model.generate(kValidTestImagePath); + auto result = model.generateFromString(kValidTestImagePath); EXPECT_NE(result, nullptr); EXPECT_GT(result->size(), 0u); } TEST(ImageEmbeddingsGenerateTests, ResultsHaveCorrectSize) { ImageEmbeddings model(kValidImageEmbeddingsModelPath, nullptr); - auto result = model.generate(kValidTestImagePath); + auto result = model.generateFromString(kValidTestImagePath); size_t numFloats = result->size() / sizeof(float); constexpr size_t kClipEmbeddingDimensions = 512; EXPECT_EQ(numFloats, kClipEmbeddingDimensions); @@ -77,7 +77,7 @@ TEST(ImageEmbeddingsGenerateTests, ResultsAreNormalized) { // TODO: Investigate the source of the issue; GTEST_SKIP() << "Expected to fail in emulator environments"; ImageEmbeddings model(kValidImageEmbeddingsModelPath, nullptr); - auto result = model.generate(kValidTestImagePath); + auto result = model.generateFromString(kValidTestImagePath); const float *data = reinterpret_cast(result->data()); size_t numFloats = result->size() / sizeof(float); @@ -92,7 +92,7 @@ TEST(ImageEmbeddingsGenerateTests, ResultsAreNormalized) { TEST(ImageEmbeddingsGenerateTests, ResultsContainValidValues) { ImageEmbeddings model(kValidImageEmbeddingsModelPath, nullptr); - auto result = model.generate(kValidTestImagePath); + auto result = model.generateFromString(kValidTestImagePath); const float *data = reinterpret_cast(result->data()); size_t numFloats = result->size() / sizeof(float); diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/OCRTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/OCRTest.cpp index 428fb5afb..6f6f708be 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/OCRTest.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/OCRTest.cpp @@ -41,7 +41,7 @@ template <> struct ModelTraits { } static void callGenerate(ModelType &model) { - (void)model.generate(kValidTestImagePath); + (void)model.generateFromString(kValidTestImagePath); } }; } // namespace model_tests @@ -67,27 +67,27 @@ TEST(OCRCtorTests, EmptySymbolsThrows) { TEST(OCRGenerateTests, InvalidImagePathThrows) { OCR model(kValidDetectorPath, kValidRecognizerPath, ENGLISH_SYMBOLS, createMockCallInvoker()); - EXPECT_THROW((void)model.generate("nonexistent_image.jpg"), + EXPECT_THROW((void)model.generateFromString("nonexistent_image.jpg"), RnExecutorchError); } TEST(OCRGenerateTests, EmptyImagePathThrows) { OCR model(kValidDetectorPath, kValidRecognizerPath, ENGLISH_SYMBOLS, createMockCallInvoker()); - EXPECT_THROW((void)model.generate(""), RnExecutorchError); + EXPECT_THROW((void)model.generateFromString(""), RnExecutorchError); } TEST(OCRGenerateTests, MalformedURIThrows) { OCR model(kValidDetectorPath, kValidRecognizerPath, ENGLISH_SYMBOLS, createMockCallInvoker()); - EXPECT_THROW((void)model.generate("not_a_valid_uri://bad"), + EXPECT_THROW((void)model.generateFromString("not_a_valid_uri://bad"), RnExecutorchError); } TEST(OCRGenerateTests, ValidImageReturnsResults) { OCR model(kValidDetectorPath, kValidRecognizerPath, ENGLISH_SYMBOLS, createMockCallInvoker()); - auto results = model.generate(kValidTestImagePath); + auto results = model.generateFromString(kValidTestImagePath); // May or may not have detections depending on image content EXPECT_GE(results.size(), 0u); } @@ -95,7 +95,7 @@ TEST(OCRGenerateTests, ValidImageReturnsResults) { TEST(OCRGenerateTests, DetectionsHaveValidBoundingBoxes) { OCR model(kValidDetectorPath, kValidRecognizerPath, ENGLISH_SYMBOLS, createMockCallInvoker()); - auto results = model.generate(kValidTestImagePath); + auto results = model.generateFromString(kValidTestImagePath); for (const auto &detection : results) { // Each bbox should have 4 points @@ -110,7 +110,7 @@ TEST(OCRGenerateTests, DetectionsHaveValidBoundingBoxes) { TEST(OCRGenerateTests, DetectionsHaveValidScores) { OCR model(kValidDetectorPath, kValidRecognizerPath, ENGLISH_SYMBOLS, createMockCallInvoker()); - auto results = model.generate(kValidTestImagePath); + auto results = model.generateFromString(kValidTestImagePath); for (const auto &detection : results) { EXPECT_GE(detection.score, 0.0f); @@ -121,7 +121,7 @@ TEST(OCRGenerateTests, DetectionsHaveValidScores) { TEST(OCRGenerateTests, DetectionsHaveNonEmptyText) { OCR model(kValidDetectorPath, kValidRecognizerPath, ENGLISH_SYMBOLS, createMockCallInvoker()); - auto results = model.generate(kValidTestImagePath); + auto results = model.generateFromString(kValidTestImagePath); for (const auto &detection : results) { EXPECT_FALSE(detection.text.empty()); } diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/StyleTransferTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/StyleTransferTest.cpp index 3e6951617..5fbf798b6 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/StyleTransferTest.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/StyleTransferTest.cpp @@ -1,6 +1,4 @@ #include "BaseModelTests.h" -#include "utils/TestUtils.h" -#include #include #include #include @@ -30,7 +28,7 @@ template <> struct ModelTraits { } static void callGenerate(ModelType &model) { - (void)model.generate(kValidTestImagePath); + (void)model.generateFromString(kValidTestImagePath); } }; } // namespace model_tests @@ -44,51 +42,34 @@ INSTANTIATE_TYPED_TEST_SUITE_P(StyleTransfer, CommonModelTest, // ============================================================================ TEST(StyleTransferGenerateTests, InvalidImagePathThrows) { StyleTransfer model(kValidStyleTransferModelPath, nullptr); - EXPECT_THROW((void)model.generate("nonexistent_image.jpg"), + EXPECT_THROW((void)model.generateFromString("nonexistent_image.jpg"), RnExecutorchError); } TEST(StyleTransferGenerateTests, EmptyImagePathThrows) { StyleTransfer model(kValidStyleTransferModelPath, nullptr); - EXPECT_THROW((void)model.generate(""), RnExecutorchError); + EXPECT_THROW((void)model.generateFromString(""), RnExecutorchError); } TEST(StyleTransferGenerateTests, MalformedURIThrows) { StyleTransfer model(kValidStyleTransferModelPath, nullptr); - EXPECT_THROW((void)model.generate("not_a_valid_uri://bad"), + EXPECT_THROW((void)model.generateFromString("not_a_valid_uri://bad"), RnExecutorchError); } -TEST(StyleTransferGenerateTests, ValidImageReturnsFilePath) { +TEST(StyleTransferGenerateTests, ValidImageReturnsNonNull) { StyleTransfer model(kValidStyleTransferModelPath, nullptr); - auto result = model.generate(kValidTestImagePath); - EXPECT_FALSE(result.empty()); -} - -TEST(StyleTransferGenerateTests, ResultIsValidFilePath) { - StyleTransfer model(kValidStyleTransferModelPath, nullptr); - auto result = model.generate(kValidTestImagePath); - test_utils::trimFilePrefix(result); - EXPECT_TRUE(std::filesystem::exists(result)); -} - -TEST(StyleTransferGenerateTests, ResultFileHasContent) { - StyleTransfer model(kValidStyleTransferModelPath, nullptr); - auto result = model.generate(kValidTestImagePath); - test_utils::trimFilePrefix(result); - auto fileSize = std::filesystem::file_size(result); - EXPECT_GT(fileSize, 0u); + auto result = model.generateFromString(kValidTestImagePath); + EXPECT_NE(result, nullptr); } TEST(StyleTransferGenerateTests, MultipleGeneratesWork) { StyleTransfer model(kValidStyleTransferModelPath, nullptr); - EXPECT_NO_THROW((void)model.generate(kValidTestImagePath)); - auto result1 = model.generate(kValidTestImagePath); - auto result2 = model.generate(kValidTestImagePath); - test_utils::trimFilePrefix(result1); - test_utils::trimFilePrefix(result2); - EXPECT_TRUE(std::filesystem::exists(result1)); - EXPECT_TRUE(std::filesystem::exists(result2)); + EXPECT_NO_THROW((void)model.generateFromString(kValidTestImagePath)); + auto result1 = model.generateFromString(kValidTestImagePath); + auto result2 = model.generateFromString(kValidTestImagePath); + EXPECT_NE(result1, nullptr); + EXPECT_NE(result2, nullptr); } TEST(StyleTransferInheritedTests, GetInputShapeWorks) { diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/VerticalOCRTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/VerticalOCRTest.cpp index 7b1010a81..56f18d862 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/VerticalOCRTest.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/VerticalOCRTest.cpp @@ -43,7 +43,7 @@ template <> struct ModelTraits { } static void callGenerate(ModelType &model) { - (void)model.generate(kValidVerticalTestImagePath); + (void)model.generateFromString(kValidVerticalTestImagePath); } }; } // namespace model_tests @@ -85,34 +85,34 @@ TEST(VerticalOCRCtorTests, IndependentCharsFalseDoesntThrow) { TEST(VerticalOCRGenerateTests, IndependentCharsInvalidImageThrows) { VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath, ENGLISH_SYMBOLS, true, createMockCallInvoker()); - EXPECT_THROW((void)model.generate("nonexistent_image.jpg"), + EXPECT_THROW((void)model.generateFromString("nonexistent_image.jpg"), RnExecutorchError); } TEST(VerticalOCRGenerateTests, IndependentCharsEmptyImagePathThrows) { VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath, ENGLISH_SYMBOLS, true, createMockCallInvoker()); - EXPECT_THROW((void)model.generate(""), RnExecutorchError); + EXPECT_THROW((void)model.generateFromString(""), RnExecutorchError); } TEST(VerticalOCRGenerateTests, IndependentCharsMalformedURIThrows) { VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath, ENGLISH_SYMBOLS, true, createMockCallInvoker()); - EXPECT_THROW((void)model.generate("not_a_valid_uri://bad"), + EXPECT_THROW((void)model.generateFromString("not_a_valid_uri://bad"), RnExecutorchError); } TEST(VerticalOCRGenerateTests, IndependentCharsValidImageReturnsResults) { VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath, ENGLISH_SYMBOLS, true, createMockCallInvoker()); - auto results = model.generate(kValidVerticalTestImagePath); + auto results = model.generateFromString(kValidVerticalTestImagePath); EXPECT_GE(results.size(), 0u); } TEST(VerticalOCRGenerateTests, IndependentCharsDetectionsHaveValidBBoxes) { VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath, ENGLISH_SYMBOLS, true, createMockCallInvoker()); - auto results = model.generate(kValidVerticalTestImagePath); + auto results = model.generateFromString(kValidVerticalTestImagePath); for (const auto &detection : results) { EXPECT_EQ(detection.bbox.size(), 4u); @@ -126,7 +126,7 @@ TEST(VerticalOCRGenerateTests, IndependentCharsDetectionsHaveValidBBoxes) { TEST(VerticalOCRGenerateTests, IndependentCharsDetectionsHaveValidScores) { VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath, ENGLISH_SYMBOLS, true, createMockCallInvoker()); - auto results = model.generate(kValidVerticalTestImagePath); + auto results = model.generateFromString(kValidVerticalTestImagePath); for (const auto &detection : results) { EXPECT_GE(detection.score, 0.0f); @@ -137,7 +137,7 @@ TEST(VerticalOCRGenerateTests, IndependentCharsDetectionsHaveValidScores) { TEST(VerticalOCRGenerateTests, IndependentCharsDetectionsHaveNonEmptyText) { VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath, ENGLISH_SYMBOLS, true, createMockCallInvoker()); - auto results = model.generate(kValidVerticalTestImagePath); + auto results = model.generateFromString(kValidVerticalTestImagePath); for (const auto &detection : results) { EXPECT_FALSE(detection.text.empty()); @@ -148,34 +148,34 @@ TEST(VerticalOCRGenerateTests, IndependentCharsDetectionsHaveNonEmptyText) { TEST(VerticalOCRGenerateTests, JointCharsInvalidImageThrows) { VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath, ENGLISH_SYMBOLS, false, createMockCallInvoker()); - EXPECT_THROW((void)model.generate("nonexistent_image.jpg"), + EXPECT_THROW((void)model.generateFromString("nonexistent_image.jpg"), RnExecutorchError); } TEST(VerticalOCRGenerateTests, JointCharsEmptyImagePathThrows) { VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath, ENGLISH_SYMBOLS, false, createMockCallInvoker()); - EXPECT_THROW((void)model.generate(""), RnExecutorchError); + EXPECT_THROW((void)model.generateFromString(""), RnExecutorchError); } TEST(VerticalOCRGenerateTests, JointCharsMalformedURIThrows) { VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath, ENGLISH_SYMBOLS, false, createMockCallInvoker()); - EXPECT_THROW((void)model.generate("not_a_valid_uri://bad"), + EXPECT_THROW((void)model.generateFromString("not_a_valid_uri://bad"), RnExecutorchError); } TEST(VerticalOCRGenerateTests, JointCharsValidImageReturnsResults) { VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath, ENGLISH_SYMBOLS, false, createMockCallInvoker()); - auto results = model.generate(kValidVerticalTestImagePath); + auto results = model.generateFromString(kValidVerticalTestImagePath); EXPECT_GE(results.size(), 0u); } TEST(VerticalOCRGenerateTests, JointCharsDetectionsHaveValidBBoxes) { VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath, ENGLISH_SYMBOLS, false, createMockCallInvoker()); - auto results = model.generate(kValidVerticalTestImagePath); + auto results = model.generateFromString(kValidVerticalTestImagePath); for (const auto &detection : results) { EXPECT_EQ(detection.bbox.size(), 4u); @@ -189,7 +189,7 @@ TEST(VerticalOCRGenerateTests, JointCharsDetectionsHaveValidBBoxes) { TEST(VerticalOCRGenerateTests, JointCharsDetectionsHaveValidScores) { VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath, ENGLISH_SYMBOLS, false, createMockCallInvoker()); - auto results = model.generate(kValidVerticalTestImagePath); + auto results = model.generateFromString(kValidVerticalTestImagePath); for (const auto &detection : results) { EXPECT_GE(detection.score, 0.0f); @@ -200,7 +200,7 @@ TEST(VerticalOCRGenerateTests, JointCharsDetectionsHaveValidScores) { TEST(VerticalOCRGenerateTests, JointCharsDetectionsHaveNonEmptyText) { VerticalOCR model(kValidVerticalDetectorPath, kValidVerticalRecognizerPath, ENGLISH_SYMBOLS, false, createMockCallInvoker()); - auto results = model.generate(kValidVerticalTestImagePath); + auto results = model.generateFromString(kValidVerticalTestImagePath); for (const auto &detection : results) { EXPECT_FALSE(detection.text.empty()); @@ -216,8 +216,10 @@ TEST(VerticalOCRStrategyTests, BothStrategiesRunSuccessfully) { kValidVerticalRecognizerPath, ENGLISH_SYMBOLS, false, createMockCallInvoker()); - EXPECT_NO_THROW((void)independentModel.generate(kValidVerticalTestImagePath)); - EXPECT_NO_THROW((void)jointModel.generate(kValidVerticalTestImagePath)); + EXPECT_NO_THROW( + (void)independentModel.generateFromString(kValidVerticalTestImagePath)); + EXPECT_NO_THROW( + (void)jointModel.generateFromString(kValidVerticalTestImagePath)); } TEST(VerticalOCRStrategyTests, BothStrategiesReturnValidResults) { @@ -229,8 +231,9 @@ TEST(VerticalOCRStrategyTests, BothStrategiesReturnValidResults) { createMockCallInvoker()); auto independentResults = - independentModel.generate(kValidVerticalTestImagePath); - auto jointResults = jointModel.generate(kValidVerticalTestImagePath); + independentModel.generateFromString(kValidVerticalTestImagePath); + auto jointResults = + jointModel.generateFromString(kValidVerticalTestImagePath); // Both should return some results (or none if no text detected) EXPECT_GE(independentResults.size(), 0u); diff --git a/packages/react-native-executorch/src/controllers/BaseOCRController.ts b/packages/react-native-executorch/src/controllers/BaseOCRController.ts index c124dadce..b6e5c3a5b 100644 --- a/packages/react-native-executorch/src/controllers/BaseOCRController.ts +++ b/packages/react-native-executorch/src/controllers/BaseOCRController.ts @@ -2,10 +2,24 @@ import { Logger } from '../common/Logger'; import { symbols } from '../constants/ocr/symbols'; import { RnExecutorchErrorCode } from '../errors/ErrorCodes'; import { RnExecutorchError, parseUnknownError } from '../errors/errorUtils'; -import { ResourceSource } from '../types/common'; +import { Frame, PixelData, ResourceSource, ScalarType } from '../types/common'; import { OCRLanguage, OCRDetection } from '../types/ocr'; import { ResourceFetcher } from '../utils/ResourceFetcher'; +function isPixelData(input: unknown): input is PixelData { + return ( + typeof input === 'object' && + input !== null && + 'dataPtr' in input && + input.dataPtr instanceof Uint8Array && + 'sizes' in input && + Array.isArray(input.sizes) && + input.sizes.length === 3 && + 'scalarType' in input && + input.scalarType === ScalarType.BYTE + ); +} + export abstract class BaseOCRController { protected nativeModule: any; public isReady: boolean = false; @@ -87,7 +101,34 @@ export abstract class BaseOCRController { } }; - public forward = async (imageSource: string): Promise => { + get runOnFrame(): ((frame: Frame) => OCRDetection[]) | null { + if (!this.nativeModule?.generateFromFrame) { + return null; + } + + const nativeGenerateFromFrame = this.nativeModule.generateFromFrame; + + return (frame: any): OCRDetection[] => { + 'worklet'; + + let nativeBuffer: any = null; + try { + nativeBuffer = frame.getNativeBuffer(); + const frameData = { + nativeBuffer: nativeBuffer.pointer, + }; + return nativeGenerateFromFrame(frameData); + } finally { + if (nativeBuffer?.release) { + nativeBuffer.release(); + } + } + }; + } + + public forward = async ( + input: string | PixelData + ): Promise => { if (!this.isReady) { throw new RnExecutorchError( RnExecutorchErrorCode.ModuleNotLoaded, @@ -104,7 +145,17 @@ export abstract class BaseOCRController { try { this.isGenerating = true; this.isGeneratingCallback(this.isGenerating); - return await this.nativeModule.generate(imageSource); + + if (typeof input === 'string') { + return await this.nativeModule.generateFromString(input); + } else if (isPixelData(input)) { + return await this.nativeModule.generateFromPixels(input); + } else { + throw new RnExecutorchError( + RnExecutorchErrorCode.InvalidArgument, + 'Invalid input: expected string path or PixelData object. For VisionCamera frames, use runOnFrame instead.' + ); + } } catch (e) { throw parseUnknownError(e); } finally { diff --git a/packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts b/packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts index 88831f9aa..55b8d8500 100644 --- a/packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts +++ b/packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts @@ -9,6 +9,7 @@ import { ModelNameOf, ModelSources, } from '../../types/imageSegmentation'; +import { Frame } from '../../types/common'; import { RnExecutorchErrorCode } from '../../errors/ErrorCodes'; import { RnExecutorchError, parseUnknownError } from '../../errors/errorUtils'; @@ -41,6 +42,14 @@ export const useImageSegmentation = ({ const [instance, setInstance] = useState > | null>(null); + const [runOnFrame, setRunOnFrame] = useState< + | (( + frame: Frame, + classesOfInterest?: string[], + resizeToInput?: boolean + ) => any) + | null + >(null); useEffect(() => { if (preventLoad) return; @@ -62,6 +71,10 @@ export const useImageSegmentation = ({ if (isMounted) { setInstance(currentInstance); setIsReady(true); + const worklet = currentInstance.runOnFrame; + if (worklet) { + setRunOnFrame(() => worklet); + } } } catch (err) { if (isMounted) setError(parseUnknownError(err)); @@ -70,6 +83,8 @@ export const useImageSegmentation = ({ return () => { isMounted = false; + setIsReady(false); + setRunOnFrame(null); currentInstance?.delete(); }; @@ -111,5 +126,6 @@ export const useImageSegmentation = ({ isGenerating, downloadProgress, forward, + runOnFrame, }; }; diff --git a/packages/react-native-executorch/src/hooks/computer_vision/useOCR.ts b/packages/react-native-executorch/src/hooks/computer_vision/useOCR.ts index 6b2868834..967f750c6 100644 --- a/packages/react-native-executorch/src/hooks/computer_vision/useOCR.ts +++ b/packages/react-native-executorch/src/hooks/computer_vision/useOCR.ts @@ -1,5 +1,6 @@ import { useEffect, useState } from 'react'; -import { OCRProps, OCRType } from '../../types/ocr'; +import { OCRProps, OCRType, OCRDetection } from '../../types/ocr'; +import { Frame } from '../../types/common'; import { OCRController } from '../../controllers/OCRController'; import { RnExecutorchError } from '../../errors/errorUtils'; @@ -15,6 +16,9 @@ export const useOCR = ({ model, preventLoad = false }: OCRProps): OCRType => { const [isReady, setIsReady] = useState(false); const [isGenerating, setIsGenerating] = useState(false); const [downloadProgress, setDownloadProgress] = useState(0); + const [runOnFrame, setRunOnFrame] = useState< + ((frame: Frame) => OCRDetection[]) | null + >(null); const [controllerInstance] = useState( () => @@ -35,9 +39,16 @@ export const useOCR = ({ model, preventLoad = false }: OCRProps): OCRType => { model.language, setDownloadProgress ); + + const worklet = controllerInstance.runOnFrame; + if (worklet) { + setRunOnFrame(() => worklet); + } })(); return () => { + setRunOnFrame(null); + setIsReady(false); controllerInstance.delete(); }; }, [ @@ -54,5 +65,6 @@ export const useOCR = ({ model, preventLoad = false }: OCRProps): OCRType => { isGenerating, forward: controllerInstance.forward, downloadProgress, + runOnFrame, }; }; diff --git a/packages/react-native-executorch/src/hooks/computer_vision/useVerticalOCR.ts b/packages/react-native-executorch/src/hooks/computer_vision/useVerticalOCR.ts index eb9d289eb..bd479aea2 100644 --- a/packages/react-native-executorch/src/hooks/computer_vision/useVerticalOCR.ts +++ b/packages/react-native-executorch/src/hooks/computer_vision/useVerticalOCR.ts @@ -1,5 +1,6 @@ import { useEffect, useState } from 'react'; -import { OCRType, VerticalOCRProps } from '../../types/ocr'; +import { OCRType, VerticalOCRProps, OCRDetection } from '../../types/ocr'; +import { Frame } from '../../types/common'; import { VerticalOCRController } from '../../controllers/VerticalOCRController'; import { RnExecutorchError } from '../../errors/errorUtils'; @@ -19,6 +20,9 @@ export const useVerticalOCR = ({ const [isReady, setIsReady] = useState(false); const [isGenerating, setIsGenerating] = useState(false); const [downloadProgress, setDownloadProgress] = useState(0); + const [runOnFrame, setRunOnFrame] = useState< + ((frame: Frame) => OCRDetection[]) | null + >(null); const [controllerInstance] = useState( () => @@ -40,9 +44,16 @@ export const useVerticalOCR = ({ independentCharacters, setDownloadProgress ); + + const worklet = controllerInstance.runOnFrame; + if (worklet) { + setRunOnFrame(() => worklet); + } })(); return () => { + setRunOnFrame(null); + setIsReady(false); controllerInstance.delete(); }; }, [ @@ -60,5 +71,6 @@ export const useVerticalOCR = ({ isGenerating, forward: controllerInstance.forward, downloadProgress, + runOnFrame, }; }; diff --git a/packages/react-native-executorch/src/hooks/useModule.ts b/packages/react-native-executorch/src/hooks/useModule.ts index 82a0bb72f..9838c4aa0 100644 --- a/packages/react-native-executorch/src/hooks/useModule.ts +++ b/packages/react-native-executorch/src/hooks/useModule.ts @@ -62,6 +62,8 @@ export const useModule = < return () => { isMounted = false; + setIsReady(false); + setRunOnFrame(null); moduleInstance.delete(); }; diff --git a/packages/react-native-executorch/src/modules/computer_vision/ClassificationModule.ts b/packages/react-native-executorch/src/modules/computer_vision/ClassificationModule.ts index 45b7e2b39..d0735ae26 100644 --- a/packages/react-native-executorch/src/modules/computer_vision/ClassificationModule.ts +++ b/packages/react-native-executorch/src/modules/computer_vision/ClassificationModule.ts @@ -1,16 +1,18 @@ import { ResourceFetcher } from '../../utils/ResourceFetcher'; -import { ResourceSource } from '../../types/common'; -import { BaseModule } from '../BaseModule'; +import { ResourceSource, PixelData } from '../../types/common'; import { RnExecutorchErrorCode } from '../../errors/ErrorCodes'; import { parseUnknownError, RnExecutorchError } from '../../errors/errorUtils'; import { Logger } from '../../common/Logger'; +import { VisionModule } from './VisionModule'; /** * Module for image classification tasks. * * @category Typescript API */ -export class ClassificationModule extends BaseModule { +export class ClassificationModule extends VisionModule<{ + [category: string]: number; +}> { /** * Loads the model, where `modelSource` is a string that specifies the location of the model binary. * To track the download progress, supply a callback function `onDownloadProgressCallback`. @@ -42,18 +44,9 @@ export class ClassificationModule extends BaseModule { } } - /** - * Executes the model's forward pass, where `imageSource` can be a fetchable resource or a Base64-encoded string. - * - * @param imageSource - The image source to be classified. - * @returns The classification result. - */ - async forward(imageSource: string): Promise<{ [category: string]: number }> { - if (this.nativeModule == null) - throw new RnExecutorchError( - RnExecutorchErrorCode.ModuleNotLoaded, - 'The model is currently not loaded. Please load the model before calling forward().' - ); - return await this.nativeModule.generate(imageSource); + async forward( + input: string | PixelData + ): Promise<{ [category: string]: number }> { + return super.forward(input); } } diff --git a/packages/react-native-executorch/src/modules/computer_vision/ImageEmbeddingsModule.ts b/packages/react-native-executorch/src/modules/computer_vision/ImageEmbeddingsModule.ts index 3e62f450d..6fb78c4cc 100644 --- a/packages/react-native-executorch/src/modules/computer_vision/ImageEmbeddingsModule.ts +++ b/packages/react-native-executorch/src/modules/computer_vision/ImageEmbeddingsModule.ts @@ -1,16 +1,16 @@ import { ResourceFetcher } from '../../utils/ResourceFetcher'; -import { ResourceSource } from '../../types/common'; +import { ResourceSource, PixelData } from '../../types/common'; import { RnExecutorchErrorCode } from '../../errors/ErrorCodes'; import { parseUnknownError, RnExecutorchError } from '../../errors/errorUtils'; -import { BaseModule } from '../BaseModule'; import { Logger } from '../../common/Logger'; +import { VisionModule } from './VisionModule'; /** * Module for generating image embeddings from input images. * * @category Typescript API */ -export class ImageEmbeddingsModule extends BaseModule { +export class ImageEmbeddingsModule extends VisionModule { /** * Loads the model, where `modelSource` is a string that specifies the location of the model binary. * @@ -41,18 +41,8 @@ export class ImageEmbeddingsModule extends BaseModule { } } - /** - * Executes the model's forward pass. Returns an embedding array for a given sentence. - * - * @param imageSource - The image source (URI/URL) to image that will be embedded. - * @returns A Float32Array containing the image embeddings. - */ - async forward(imageSource: string): Promise { - if (this.nativeModule == null) - throw new RnExecutorchError( - RnExecutorchErrorCode.ModuleNotLoaded, - 'The model is currently not loaded. Please load the model before calling forward().' - ); - return new Float32Array(await this.nativeModule.generate(imageSource)); + async forward(input: string | PixelData): Promise { + const result = await super.forward(input); + return new Float32Array(result as unknown as ArrayBuffer); } } diff --git a/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts b/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts index f2de6edd7..b2f7c908f 100644 --- a/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts +++ b/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts @@ -1,5 +1,11 @@ import { ResourceFetcher } from '../../utils/ResourceFetcher'; -import { ResourceSource, LabelEnum } from '../../types/common'; +import { + ResourceSource, + LabelEnum, + Frame, + PixelData, + ScalarType, +} from '../../types/common'; import { DeeplabLabel, ModelNameOf, @@ -47,6 +53,20 @@ export type SegmentationLabels = type ResolveLabels = T extends SegmentationModelName ? SegmentationLabels : T; +function isPixelData(input: unknown): input is PixelData { + return ( + typeof input === 'object' && + input !== null && + 'dataPtr' in input && + (input as any).dataPtr instanceof Uint8Array && + 'sizes' in input && + Array.isArray((input as any).sizes) && + (input as any).sizes.length === 3 && + 'scalarType' in input && + (input as any).scalarType === ScalarType.BYTE + ); +} + /** * Generic image segmentation module with type-safe label maps. * Use a model name (e.g. `'deeplab-v3'`) as the generic parameter for built-in models, @@ -75,6 +95,75 @@ export class ImageSegmentationModule< // TODO: figure it out so we can delete this (we need this because of basemodule inheritance) override async load() {} + /** + * Synchronous worklet function for real-time VisionCamera frame processing. + * Automatically handles native buffer extraction and cleanup. + * + * **Use this for VisionCamera frame processing in worklets.** + * For async processing, use `forward()` instead. + * + * Available after model is loaded. + * + * @example + * ```typescript + * const [runOnFrame, setRunOnFrame] = useState(null); + * setRunOnFrame(() => segmentation.runOnFrame); + * + * const frameOutput = useFrameOutput({ + * onFrame(frame) { + * 'worklet'; + * if (!runOnFrame) return; + * const result = runOnFrame(frame, [], true); + * frame.dispose(); + * } + * }); + * ``` + * + * @param frame - VisionCamera Frame object + * @param classesOfInterest - Labels for which to return per-class probability masks. + * @param resizeToInput - Whether to resize masks to original frame dimensions. Defaults to `true`. + */ + get runOnFrame(): + | (( + frame: Frame, + classesOfInterest?: string[], + resizeToInput?: boolean + ) => any) + | null { + if (!this.nativeModule?.generateFromFrame) { + return null; + } + + const nativeGenerateFromFrame = this.nativeModule.generateFromFrame; + const allClassNames = this.allClassNames; + + return ( + frame: any, + classesOfInterest: string[] = [], + resizeToInput: boolean = true + ): any => { + 'worklet'; + + let nativeBuffer: any = null; + try { + nativeBuffer = frame.getNativeBuffer(); + const frameData = { + nativeBuffer: nativeBuffer.pointer, + }; + return nativeGenerateFromFrame( + frameData, + allClassNames, + classesOfInterest, + resizeToInput + ); + } finally { + if (nativeBuffer?.release) { + nativeBuffer.release(); + } + } + }; + } + /** * Creates a segmentation instance for a built-in model. * The config object is discriminated by `modelName` β€” each model can require different fields. @@ -167,14 +256,20 @@ export class ImageSegmentationModule< /** * Executes the model's forward pass to perform semantic segmentation on the provided image. * - * @param imageSource - A string representing the image source (e.g., a file path, URI, or Base64-encoded string). + * Supports two input types: + * 1. **String path/URI**: File path, URL, or Base64-encoded string + * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage) + * + * **Note**: For VisionCamera frame processing, use `runOnFrame` instead. + * + * @param input - Image source (string or PixelData object) * @param classesOfInterest - An optional list of label keys indicating which per-class probability masks to include in the output. `ARGMAX` is always returned regardless. * @param resizeToInput - Whether to resize the output masks to the original input image dimensions. If `false`, returns the raw model output dimensions. Defaults to `true`. * @returns A Promise resolving to an object with an `'ARGMAX'` key mapped to an `Int32Array` of per-pixel class indices, and each requested class label mapped to a `Float32Array` of per-pixel probabilities. * @throws {RnExecutorchError} If the model is not loaded. */ async forward>( - imageSource: string, + input: string | PixelData, classesOfInterest: K[] = [], resizeToInput: boolean = true ): Promise & Record> { @@ -189,14 +284,29 @@ export class ImageSegmentationModule< String(label) ); - const nativeResult = await this.nativeModule.generate( - imageSource, - this.allClassNames, - classesOfInterestNames, - resizeToInput - ); - - return nativeResult as Record<'ARGMAX', Int32Array> & - Record; + if (typeof input === 'string') { + const nativeResult = await this.nativeModule.generateFromString( + input, + this.allClassNames, + classesOfInterestNames, + resizeToInput + ); + return nativeResult as Record<'ARGMAX', Int32Array> & + Record; + } else if (isPixelData(input)) { + const nativeResult = await this.nativeModule.generateFromPixels( + input, + this.allClassNames, + classesOfInterestNames, + resizeToInput + ); + return nativeResult as Record<'ARGMAX', Int32Array> & + Record; + } else { + throw new RnExecutorchError( + RnExecutorchErrorCode.InvalidArgument, + 'Invalid input: expected string path or PixelData object. For VisionCamera frames, use runOnFrame instead.' + ); + } } } diff --git a/packages/react-native-executorch/src/modules/computer_vision/StyleTransferModule.ts b/packages/react-native-executorch/src/modules/computer_vision/StyleTransferModule.ts index 90e5242de..fc83f7bc7 100644 --- a/packages/react-native-executorch/src/modules/computer_vision/StyleTransferModule.ts +++ b/packages/react-native-executorch/src/modules/computer_vision/StyleTransferModule.ts @@ -1,16 +1,16 @@ import { ResourceFetcher } from '../../utils/ResourceFetcher'; -import { ResourceSource } from '../../types/common'; +import { ResourceSource, PixelData } from '../../types/common'; import { RnExecutorchErrorCode } from '../../errors/ErrorCodes'; import { parseUnknownError, RnExecutorchError } from '../../errors/errorUtils'; -import { BaseModule } from '../BaseModule'; import { Logger } from '../../common/Logger'; +import { VisionModule } from './VisionModule'; /** * Module for style transfer tasks. * * @category Typescript API */ -export class StyleTransferModule extends BaseModule { +export class StyleTransferModule extends VisionModule { /** * Loads the model, where `modelSource` is a string that specifies the location of the model binary. * To track the download progress, supply a callback function `onDownloadProgressCallback`. @@ -42,18 +42,7 @@ export class StyleTransferModule extends BaseModule { } } - /** - * Executes the model's forward pass, where `imageSource` can be a fetchable resource or a Base64-encoded string. - * - * @param imageSource - The image source to be processed. - * @returns The stylized image as a Base64-encoded string. - */ - async forward(imageSource: string): Promise { - if (this.nativeModule == null) - throw new RnExecutorchError( - RnExecutorchErrorCode.ModuleNotLoaded, - 'The model is currently not loaded. Please load the model before calling forward().' - ); - return await this.nativeModule.generate(imageSource); + async forward(input: string | PixelData): Promise { + return super.forward(input); } } diff --git a/packages/react-native-executorch/src/types/classification.ts b/packages/react-native-executorch/src/types/classification.ts index 51152ec08..64a20ecf3 100644 --- a/packages/react-native-executorch/src/types/classification.ts +++ b/packages/react-native-executorch/src/types/classification.ts @@ -1,5 +1,5 @@ import { RnExecutorchError } from '../errors/errorUtils'; -import { ResourceSource } from './common'; +import { ResourceSource, PixelData, Frame } from './common'; /** * Props for the `useClassification` hook. @@ -43,9 +43,46 @@ export interface ClassificationType { /** * Executes the model's forward pass to classify the provided image. - * @param imageSource - A string representing the image source (e.g., a file path, URI, or base64 string) to be classified. - * @returns A Promise that resolves to the classification result (typically containing labels and confidence scores). + * + * Supports two input types: + * 1. **String path/URI**: File path, URL, or Base64-encoded string + * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage) + * + * **Note**: For VisionCamera frame processing, use `runOnFrame` instead. + * + * @param input - Image source (string or PixelData object) + * @returns A Promise that resolves to the classification result (labels and confidence scores). * @throws {RnExecutorchError} If the model is not loaded or is currently processing another image. */ - forward: (imageSource: string) => Promise<{ [category: string]: number }>; + forward: ( + input: string | PixelData + ) => Promise<{ [category: string]: number }>; + + /** + * Synchronous worklet function for real-time VisionCamera frame processing. + * Automatically handles native buffer extraction and cleanup. + * + * **Use this for VisionCamera frame processing in worklets.** + * For async processing, use `forward()` instead. + * + * Available after model is loaded (`isReady: true`). + * + * @example + * ```typescript + * const { runOnFrame, isReady } = useClassification({ model: MODEL }); + * + * const frameOutput = useFrameOutput({ + * onFrame(frame) { + * 'worklet'; + * if (!runOnFrame) return; + * const result = runOnFrame(frame); + * frame.dispose(); + * } + * }); + * ``` + * + * @param frame - VisionCamera Frame object + * @returns Object mapping class labels to confidence scores. + */ + runOnFrame: ((frame: Frame) => { [category: string]: number }) | null; } diff --git a/packages/react-native-executorch/src/types/imageEmbeddings.ts b/packages/react-native-executorch/src/types/imageEmbeddings.ts index 5dc23d66f..ccee4b4b1 100644 --- a/packages/react-native-executorch/src/types/imageEmbeddings.ts +++ b/packages/react-native-executorch/src/types/imageEmbeddings.ts @@ -1,5 +1,5 @@ import { RnExecutorchError } from '../errors/errorUtils'; -import { ResourceSource } from './common'; +import { ResourceSource, PixelData, Frame } from './common'; /** * Props for the `useImageEmbeddings` hook. @@ -43,9 +43,30 @@ export interface ImageEmbeddingsType { /** * Executes the model's forward pass to generate embeddings (a feature vector) for the provided image. - * @param imageSource - A string representing the image source (e.g., a file path, URI, or base64 string) to be processed. + * + * Supports two input types: + * 1. **String path/URI**: File path, URL, or Base64-encoded string + * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage) + * + * **Note**: For VisionCamera frame processing, use `runOnFrame` instead. + * + * @param input - Image source (string or PixelData object) * @returns A Promise that resolves to a `Float32Array` containing the generated embedding vector. * @throws {RnExecutorchError} If the model is not loaded or is currently processing another image. */ - forward: (imageSource: string) => Promise; + forward: (input: string | PixelData) => Promise; + + /** + * Synchronous worklet function for real-time VisionCamera frame processing. + * Automatically handles native buffer extraction and cleanup. + * + * **Use this for VisionCamera frame processing in worklets.** + * For async processing, use `forward()` instead. + * + * Available after model is loaded (`isReady: true`). + * + * @param frame - VisionCamera Frame object + * @returns Float32Array containing the embedding vector for the frame. + */ + runOnFrame: ((frame: Frame) => Float32Array) | null; } diff --git a/packages/react-native-executorch/src/types/imageSegmentation.ts b/packages/react-native-executorch/src/types/imageSegmentation.ts index 6d79a801d..7e760487c 100644 --- a/packages/react-native-executorch/src/types/imageSegmentation.ts +++ b/packages/react-native-executorch/src/types/imageSegmentation.ts @@ -1,5 +1,5 @@ import { RnExecutorchError } from '../errors/errorUtils'; -import { LabelEnum, Triple, ResourceSource } from './common'; +import { LabelEnum, Triple, ResourceSource, PixelData, Frame } from './common'; /** * Configuration for a custom segmentation model. @@ -127,15 +127,44 @@ export interface ImageSegmentationType { /** * Executes the model's forward pass to perform semantic segmentation on the provided image. - * @param imageSource - A string representing the image source (e.g., a file path, URI, or base64 string) to be processed. + * + * Supports two input types: + * 1. **String path/URI**: File path, URL, or Base64-encoded string + * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage) + * + * **Note**: For VisionCamera frame processing, use `runOnFrame` instead. + * + * @param input - Image source (string or PixelData object) * @param classesOfInterest - An optional array of label keys indicating which per-class probability masks to include in the output. `ARGMAX` is always returned regardless. * @param resizeToInput - Whether to resize the output masks to the original input image dimensions. If `false`, returns the raw model output dimensions. Defaults to `true`. * @returns A Promise resolving to an object with an `'ARGMAX'` `Int32Array` of per-pixel class indices, and each requested class label mapped to a `Float32Array` of per-pixel probabilities. * @throws {RnExecutorchError} If the model is not loaded or is currently processing another image. */ forward: ( - imageSource: string, + input: string | PixelData, classesOfInterest?: K[], resizeToInput?: boolean ) => Promise & Record>; + + /** + * Synchronous worklet function for real-time VisionCamera frame processing. + * Automatically handles native buffer extraction and cleanup. + * + * **Use this for VisionCamera frame processing in worklets.** + * For async processing, use `forward()` instead. + * + * Available after model is loaded (`isReady: true`). + * + * @param frame - VisionCamera Frame object + * @param classesOfInterest - Labels for which to return per-class probability masks. + * @param resizeToInput - Whether to resize masks to original frame dimensions. Defaults to `true`. + * @returns Object with `ARGMAX` Int32Array and per-class Float32Array masks. + */ + runOnFrame: + | (( + frame: Frame, + classesOfInterest?: string[], + resizeToInput?: boolean + ) => Record<'ARGMAX', Int32Array> & Record) + | null; } diff --git a/packages/react-native-executorch/src/types/ocr.ts b/packages/react-native-executorch/src/types/ocr.ts index 6ca2f4324..1b0640172 100644 --- a/packages/react-native-executorch/src/types/ocr.ts +++ b/packages/react-native-executorch/src/types/ocr.ts @@ -1,6 +1,6 @@ import { symbols } from '../constants/ocr/symbols'; import { RnExecutorchError } from '../errors/errorUtils'; -import { ResourceSource } from './common'; +import { Frame, PixelData, ResourceSource } from './common'; /** * OCRDetection represents a single detected text instance in an image, @@ -104,11 +104,35 @@ export interface OCRType { /** * Executes the OCR pipeline (detection and recognition) on the provided image. - * @param imageSource - A string representing the image source (e.g., a file path, URI, or base64 string) to be processed. - * @returns A Promise that resolves to the OCR results (typically containing the recognized text strings and their bounding boxes). + * + * Supports two input types: + * 1. **String path/URI**: File path, URL, or Base64-encoded string + * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage) + * + * **Note**: For VisionCamera frame processing, use `runOnFrame` instead. + * + * @param input - Image source (string or PixelData object) + * @returns A Promise that resolves to the OCR results (recognized text and bounding boxes). * @throws {RnExecutorchError} If the models are not loaded or are currently processing another image. */ - forward: (imageSource: string) => Promise; + forward: (input: string | PixelData) => Promise; + + /** + * Synchronous worklet function for VisionCamera frame processing. + * Automatically handles native buffer extraction and cleanup. + * + * **Use this for VisionCamera frame processing in worklets.** + * For async processing, use `forward()` instead. + * + * **Note**: OCR is a two-stage pipeline (detection + recognition) and may not + * achieve real-time frame rates. Frames may be dropped if inference is still running. + * + * Available after model is loaded (`isReady: true`). + * + * @param frame - VisionCamera Frame object + * @returns Array of OCRDetection results for the frame. + */ + runOnFrame: ((frame: Frame) => OCRDetection[]) | null; } /** diff --git a/packages/react-native-executorch/src/types/styleTransfer.ts b/packages/react-native-executorch/src/types/styleTransfer.ts index 162086722..3cf3d17fa 100644 --- a/packages/react-native-executorch/src/types/styleTransfer.ts +++ b/packages/react-native-executorch/src/types/styleTransfer.ts @@ -1,5 +1,5 @@ import { RnExecutorchError } from '../errors/errorUtils'; -import { ResourceSource } from './common'; +import { ResourceSource, PixelData, Frame } from './common'; /** * Configuration properties for the `useStyleTransfer` hook. @@ -43,9 +43,30 @@ export interface StyleTransferType { /** * Executes the model's forward pass to apply the specific artistic style to the provided image. - * @param imageSource - A string representing the input image source (e.g., a file path, URI, or base64 string) to be stylized. - * @returns A Promise that resolves to a string containing the stylized image (typically as a base64 string or a file URI). + * + * Supports two input types: + * 1. **String path/URI**: File path, URL, or Base64-encoded string + * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage) + * + * **Note**: For VisionCamera frame processing, use `runOnFrame` instead. + * + * @param input - Image source (string or PixelData object) + * @returns A Promise that resolves to `PixelData` containing the stylized image as raw RGB pixel data. * @throws {RnExecutorchError} If the model is not loaded or is currently processing another image. */ - forward: (imageSource: string) => Promise; + forward: (input: string | PixelData) => Promise; + + /** + * Synchronous worklet function for real-time VisionCamera frame processing. + * Automatically handles native buffer extraction and cleanup. + * + * **Use this for VisionCamera frame processing in worklets.** + * For async processing, use `forward()` instead. + * + * Available after model is loaded (`isReady: true`). + * + * @param frame - VisionCamera Frame object + * @returns PixelData containing the stylized frame as raw RGB pixel data. + */ + runOnFrame: ((frame: Frame) => PixelData) | null; } From 622e8d589b212fafdc7fbe0c51a0be3d8464deab Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Wed, 25 Feb 2026 16:54:40 +0100 Subject: [PATCH 31/37] fix: rebase things --- .../app/object_detection/index.tsx | 172 +----------------- .../metaprogramming/TypeConcepts.h | 10 - .../computer_vision/useImageSegmentation.ts | 4 +- .../computer_vision/ObjectDetectionModule.ts | 165 ++++------------- 4 files changed, 47 insertions(+), 304 deletions(-) diff --git a/apps/computer-vision/app/object_detection/index.tsx b/apps/computer-vision/app/object_detection/index.tsx index 54c0eb18f..6a43dd920 100644 --- a/apps/computer-vision/app/object_detection/index.tsx +++ b/apps/computer-vision/app/object_detection/index.tsx @@ -1,72 +1,16 @@ import Spinner from '../../components/Spinner'; +import { BottomBar } from '../../components/BottomBar'; import { getImage } from '../../utils'; import { Detection, useObjectDetection, SSDLITE_320_MOBILENET_V3_LARGE, } from 'react-native-executorch'; -import { View, StyleSheet, Image, TouchableOpacity, Text } from 'react-native'; +import { View, StyleSheet, Image } from 'react-native'; import ImageWithBboxes from '../../components/ImageWithBboxes'; import React, { useContext, useEffect, useState } from 'react'; import { GeneratingContext } from '../../context'; import ScreenWrapper from '../../ScreenWrapper'; -import ColorPalette from '../../colors'; -import { Images } from 'react-native-nitro-image'; - -// Helper function to convert BGRA to RGB -function convertBGRAtoRGB( - buffer: ArrayBuffer, - width: number, - height: number -): ArrayBuffer { - const source = new Uint8Array(buffer); - const rgb = new Uint8Array(width * height * 3); - - for (let i = 0; i < width * height; i++) { - // BGRA format: [B, G, R, A] β†’ RGB: [R, G, B] - rgb[i * 3 + 0] = source[i * 4 + 2]; // R - rgb[i * 3 + 1] = source[i * 4 + 1]; // G - rgb[i * 3 + 2] = source[i * 4 + 0]; // B - } - - return rgb.buffer; -} - -// Helper function to convert image URI to raw RGB pixel data -async function imageUriToPixelData( - uri: string, - targetWidth: number, - targetHeight: number -): Promise<{ - data: ArrayBuffer; - width: number; - height: number; - channels: number; -}> { - try { - // Load image and resize to target dimensions - const image = await Images.loadFromFileAsync(uri); - const resized = image.resize(targetWidth, targetHeight); - - // Get pixel data as ArrayBuffer (BGRA format from NitroImage) - const rawPixelData = resized.toRawPixelData(); - const buffer = - rawPixelData instanceof ArrayBuffer ? rawPixelData : rawPixelData.buffer; - - // Convert BGRA to RGB as required by the native API - const rgbBuffer = convertBGRAtoRGB(buffer, targetWidth, targetHeight); - - return { - data: rgbBuffer, - width: targetWidth, - height: targetHeight, - channels: 3, // RGB - }; - } catch (error) { - console.error('Error loading image with NitroImage:', error); - throw error; - } -} export default function ObjectDetectionScreen() { const [imageUri, setImageUri] = useState(''); @@ -98,40 +42,10 @@ export default function ObjectDetectionScreen() { const runForward = async () => { if (imageUri) { try { - console.log('Running forward with string URI...'); - const output = await ssdLite.forward(imageUri, 0.5); - console.log('String URI result:', output.length, 'detections'); - setResults(output); - } catch (e) { - console.error('Error in runForward:', e); - } - } - }; - - const runForwardPixels = async () => { - if (imageUri && imageDimensions) { - try { - console.log('Converting image to pixel data...'); - // Use original dimensions - let the model resize internally - const pixelData = await imageUriToPixelData( - imageUri, - imageDimensions.width, - imageDimensions.height - ); - - console.log('Running forward with pixel data...', { - width: pixelData.width, - height: pixelData.height, - channels: pixelData.channels, - dataSize: pixelData.data.byteLength, - }); - - // Run inference using unified forward() API - const output = await ssdLite.forward(pixelData, 0.3); - console.log('Pixel data result:', output.length, 'detections'); + const output = await ssdLite.forward(imageUri); setResults(output); } catch (e) { - console.error('Error in runForwardPixels:', e); + console.error(e); } } }; @@ -167,41 +81,10 @@ export default function ObjectDetectionScreen() { )} - - {/* Custom bottom bar with two buttons */} - - - handleCameraPress(false)}> - πŸ“· Gallery - - - - - - Run (String) - - - - Run (Pixels) - - - + ); } @@ -246,43 +129,4 @@ const styles = StyleSheet.create({ width: '100%', height: '100%', }, - bottomContainer: { - width: '100%', - gap: 15, - alignItems: 'center', - padding: 16, - flex: 1, - }, - bottomIconsContainer: { - flexDirection: 'row', - justifyContent: 'center', - width: '100%', - }, - iconText: { - fontSize: 16, - color: ColorPalette.primary, - }, - buttonsRow: { - flexDirection: 'row', - width: '100%', - gap: 10, - }, - button: { - height: 50, - justifyContent: 'center', - alignItems: 'center', - backgroundColor: ColorPalette.primary, - color: '#fff', - borderRadius: 8, - }, - halfButton: { - flex: 1, - }, - buttonDisabled: { - opacity: 0.5, - }, - buttonText: { - color: '#fff', - fontSize: 16, - }, }); diff --git a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h index 216e2bae3..2d7612f25 100644 --- a/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h +++ b/packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h @@ -26,16 +26,6 @@ concept HasGenerateFromPixels = requires(T t) { { &T::generateFromPixels }; }; -template -concept HasGenerateFromString = requires(T t) { - { &T::generateFromString }; -}; - -template -concept HasGenerateFromPixels = requires(T t) { - { &T::generateFromPixels }; -}; - template concept HasGenerateFromFrame = requires(T t) { { &T::generateFromFrame }; diff --git a/packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts b/packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts index 55b8d8500..26a804227 100644 --- a/packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts +++ b/packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts @@ -9,7 +9,7 @@ import { ModelNameOf, ModelSources, } from '../../types/imageSegmentation'; -import { Frame } from '../../types/common'; +import { Frame, PixelData } from '../../types/common'; import { RnExecutorchErrorCode } from '../../errors/ErrorCodes'; import { RnExecutorchError, parseUnknownError } from '../../errors/errorUtils'; @@ -92,7 +92,7 @@ export const useImageSegmentation = ({ }, [model.modelName, model.modelSource, preventLoad]); const forward = async >>( - imageSource: string, + imageSource: string | PixelData, classesOfInterest: K[] = [], resizeToInput: boolean = true ) => { diff --git a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts index 762d09987..f056cff62 100644 --- a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts +++ b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts @@ -1,143 +1,52 @@ -import { BaseModule } from '../BaseModule'; +import { ResourceFetcher } from '../../utils/ResourceFetcher'; +import { ResourceSource, PixelData } from '../../types/common'; +import { Detection } from '../../types/objectDetection'; import { RnExecutorchErrorCode } from '../../errors/ErrorCodes'; -import { RnExecutorchError } from '../../errors/errorUtils'; -import { Frame, PixelData, ScalarType } from '../../types/common'; +import { parseUnknownError, RnExecutorchError } from '../../errors/errorUtils'; +import { Logger } from '../../common/Logger'; +import { VisionModule } from './VisionModule'; /** - * Base class for computer vision models that support multiple input types. - * - * VisionModule extends BaseModule with: - * - Unified `forward()` API accepting string paths or raw pixel data - * - `runOnFrame` getter for real-time VisionCamera frame processing - * - Shared frame processor creation logic - * - * Subclasses should only implement model-specific loading logic. + * Module for object detection tasks. * * @category Typescript API */ -function isPixelData(input: unknown): input is PixelData { - return ( - typeof input === 'object' && - input !== null && - 'dataPtr' in input && - input.dataPtr instanceof Uint8Array && - 'sizes' in input && - Array.isArray(input.sizes) && - input.sizes.length === 3 && - 'scalarType' in input && - input.scalarType === ScalarType.BYTE - ); -} - -export abstract class VisionModule extends BaseModule { +export class ObjectDetectionModule extends VisionModule { /** - * Synchronous worklet function for real-time VisionCamera frame processing. - * - * Only available after the model is loaded. Returns null if not loaded. - * - * **Use this for VisionCamera frame processing in worklets.** - * For async processing, use `forward()` instead. - * - * @example - * ```typescript - * const model = new ClassificationModule(); - * await model.load({ modelSource: MODEL }); + * Loads the model, where `modelSource` is a string that specifies the location of the model binary. + * To track the download progress, supply a callback function `onDownloadProgressCallback`. * - * // Use the functional form of setState to store the worklet β€” passing it - * // directly would cause React to invoke it immediately as an updater fn. - * const [runOnFrame, setRunOnFrame] = useState(null); - * setRunOnFrame(() => model.runOnFrame); - * - * const frameOutput = useFrameOutput({ - * onFrame(frame) { - * 'worklet'; - * if (!runOnFrame) return; - * const result = runOnFrame(frame); - * frame.dispose(); - * } - * }); - * ``` + * @param model - Object containing `modelSource`. + * @param onDownloadProgressCallback - Optional callback to monitor download progress. */ - get runOnFrame(): ((frame: Frame, ...args: any[]) => TOutput) | null { - if (!this.nativeModule?.generateFromFrame) { - return null; - } - - // Extract pure JSI function reference (runs on JS thread) - const nativeGenerateFromFrame = this.nativeModule.generateFromFrame; - - // Return worklet that captures ONLY the JSI function - return (frame: any, ...args: any[]): TOutput => { - 'worklet'; + async load( + model: { modelSource: ResourceSource }, + onDownloadProgressCallback: (progress: number) => void = () => {} + ): Promise { + try { + const paths = await ResourceFetcher.fetch( + onDownloadProgressCallback, + model.modelSource + ); - let nativeBuffer: any = null; - try { - nativeBuffer = frame.getNativeBuffer(); - const frameData = { - nativeBuffer: nativeBuffer.pointer, - }; - return nativeGenerateFromFrame(frameData, ...args); - } finally { - if (nativeBuffer?.release) { - nativeBuffer.release(); - } + if (!paths?.[0]) { + throw new RnExecutorchError( + RnExecutorchErrorCode.DownloadInterrupted, + 'The download has been interrupted. As a result, not every file was downloaded. Please retry the download.' + ); } - }; - } - /** - * Executes the model's forward pass with automatic input type detection. - * - * Supports two input types: - * 1. **String path/URI**: File path, URL, or Base64-encoded string - * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage) - * - * **Note**: For VisionCamera frame processing, use `runOnFrame` instead. - * This method is async and cannot be called in worklet context. - * - * @param input - Image source (string path or PixelData object) - * @param args - Additional model-specific arguments - * @returns A Promise that resolves to the model output. - * - * @example - * ```typescript - * // String path (async) - * const result1 = await model.forward('file:///path/to/image.jpg'); - * - * // Pixel data (async) - * const result2 = await model.forward({ - * dataPtr: new Uint8Array(pixelBuffer), - * sizes: [480, 640, 3], - * scalarType: ScalarType.BYTE - * }); - * - * // For VisionCamera frames, use runOnFrame in worklet: - * const frameOutput = useFrameOutput({ - * onFrame(frame) { - * 'worklet'; - * if (!model.runOnFrame) return; - * const result = model.runOnFrame(frame); - * } - * }); - * ``` - */ - async forward(input: string | PixelData, ...args: any[]): Promise { - if (this.nativeModule == null) - throw new RnExecutorchError( - RnExecutorchErrorCode.ModuleNotLoaded, - 'The model is currently not loaded. Please load the model before calling forward().' - ); - - // Type detection and routing - if (typeof input === 'string') { - return await this.nativeModule.generateFromString(input, ...args); - } else if (isPixelData(input)) { - return await this.nativeModule.generateFromPixels(input, ...args); - } else { - throw new RnExecutorchError( - RnExecutorchErrorCode.InvalidArgument, - 'Invalid input: expected string path or PixelData object. For VisionCamera frames, use runOnFrame instead.' - ); + this.nativeModule = global.loadObjectDetection(paths[0]); + } catch (error) { + Logger.error('Load failed:', error); + throw parseUnknownError(error); } } + + async forward( + input: string | PixelData, + detectionThreshold: number = 0.5 + ): Promise { + return super.forward(input, detectionThreshold); + } } From dc5e65eb60ae41a407211b86d9dc2fcb96b55e2e Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Wed, 25 Feb 2026 19:22:16 +0100 Subject: [PATCH 32/37] chore: remove comment --- apps/computer-vision/app/vision_camera_live/index.tsx | 2 -- 1 file changed, 2 deletions(-) diff --git a/apps/computer-vision/app/vision_camera_live/index.tsx b/apps/computer-vision/app/vision_camera_live/index.tsx index 4c7b425b1..8c5d71d33 100644 --- a/apps/computer-vision/app/vision_camera_live/index.tsx +++ b/apps/computer-vision/app/vision_camera_live/index.tsx @@ -71,8 +71,6 @@ const MODELS: { id: ModelId; label: string }[] = [ { id: 'ocr', label: 'OCR' }, ]; -// ─── Segmentation colors ───────────────────────────────────────────────────── - const CLASS_COLORS: number[][] = [ [0, 0, 0, 0], [51, 255, 87, 180], From 2c8dd67feaee982ca7aa3477e9cf091c62dbcb4f Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Thu, 26 Feb 2026 11:41:13 +0100 Subject: [PATCH 33/37] feat: add dedicated vision camera screen showcasing classification/segmentation/object detection --- apps/computer-vision/app/_layout.tsx | 57 +- .../app/classification_live/index.tsx | 255 ------ .../app/image_segmentation_live/index.tsx | 292 ------- apps/computer-vision/app/index.tsx | 12 +- .../app/object_detection_live/index.tsx | 300 ------- apps/computer-vision/app/ocr_live/index.tsx | 329 -------- .../app/style_transfer_live/index.tsx | 274 ------ .../app/vision_camera/index.tsx | 665 +++++++++++++++ .../app/vision_camera_live/index.tsx | 796 ------------------ 9 files changed, 680 insertions(+), 2300 deletions(-) delete mode 100644 apps/computer-vision/app/classification_live/index.tsx delete mode 100644 apps/computer-vision/app/image_segmentation_live/index.tsx delete mode 100644 apps/computer-vision/app/object_detection_live/index.tsx delete mode 100644 apps/computer-vision/app/ocr_live/index.tsx delete mode 100644 apps/computer-vision/app/style_transfer_live/index.tsx create mode 100644 apps/computer-vision/app/vision_camera/index.tsx delete mode 100644 apps/computer-vision/app/vision_camera_live/index.tsx diff --git a/apps/computer-vision/app/_layout.tsx b/apps/computer-vision/app/_layout.tsx index b614b54bf..eafbc70e6 100644 --- a/apps/computer-vision/app/_layout.tsx +++ b/apps/computer-vision/app/_layout.tsx @@ -59,6 +59,15 @@ export default function _layout() { headerTitleStyle: { color: ColorPalette.primary }, }} > + - - - - - - { - setGlobalGenerating(isGenerating); - }, [isGenerating, setGlobalGenerating]); - - const [topLabel, setTopLabel] = useState(''); - const [topScore, setTopScore] = useState(0); - const [fps, setFps] = useState(0); - const lastFrameTimeRef = useRef(Date.now()); - - const cameraPermission = useCameraPermission(); - const devices = useCameraDevices(); - const device = devices.find((d) => d.position === 'back') ?? devices[0]; - - const format = useMemo(() => { - if (device == null) return undefined; - try { - return getCameraFormat(device, Templates.FrameProcessing); - } catch { - return undefined; - } - }, [device]); - - const updateStats = useCallback( - (result: { label: string; score: number }) => { - setTopLabel(result.label); - setTopScore(result.score); - const now = Date.now(); - const timeDiff = now - lastFrameTimeRef.current; - if (timeDiff > 0) { - setFps(Math.round(1000 / timeDiff)); - } - lastFrameTimeRef.current = now; - }, - [] - ); - - const frameOutput = useFrameOutput({ - pixelFormat: 'rgb', - onFrame(frame) { - 'worklet'; - if (!runOnFrame) { - frame.dispose(); - return; - } - try { - const result = runOnFrame(frame); - if (result) { - // find the top-1 entry - let bestLabel = ''; - let bestScore = -1; - const entries = Object.entries(result); - for (let i = 0; i < entries.length; i++) { - const [label, score] = entries[i]; - if ((score as number) > bestScore) { - bestScore = score as number; - bestLabel = label; - } - } - scheduleOnRN(updateStats, { label: bestLabel, score: bestScore }); - } - } catch { - // ignore frame errors - } finally { - frame.dispose(); - } - }, - }); - - if (!isReady) { - return ( - - ); - } - - if (!cameraPermission.hasPermission) { - return ( - - Camera access needed - cameraPermission.requestPermission()} - style={styles.button} - > - Grant Permission - - - ); - } - - if (device == null) { - return ( - - No camera device found - - ); - } - - return ( - - - - - - - - - - {topLabel || 'β€”'} - - - {topLabel ? (topScore * 100).toFixed(1) + '%' : ''} - - - - - {fps} - fps - - - - - ); -} - -const styles = StyleSheet.create({ - container: { - flex: 1, - backgroundColor: 'black', - }, - centered: { - flex: 1, - backgroundColor: 'black', - justifyContent: 'center', - alignItems: 'center', - gap: 16, - }, - message: { - color: 'white', - fontSize: 18, - }, - button: { - paddingHorizontal: 24, - paddingVertical: 12, - backgroundColor: ColorPalette.primary, - borderRadius: 24, - }, - buttonText: { - color: 'white', - fontSize: 15, - fontWeight: '600', - letterSpacing: 0.3, - }, - bottomBarWrapper: { - position: 'absolute', - bottom: 0, - left: 0, - right: 0, - alignItems: 'center', - paddingHorizontal: 16, - }, - bottomBar: { - flexDirection: 'row', - alignItems: 'center', - backgroundColor: 'rgba(0, 0, 0, 0.55)', - borderRadius: 24, - paddingHorizontal: 28, - paddingVertical: 10, - gap: 24, - maxWidth: '100%', - }, - labelContainer: { - flex: 1, - alignItems: 'flex-start', - }, - labelText: { - color: 'white', - fontSize: 16, - fontWeight: '700', - }, - scoreText: { - color: 'rgba(255,255,255,0.7)', - fontSize: 13, - fontWeight: '500', - }, - statItem: { - alignItems: 'center', - }, - statValue: { - color: 'white', - fontSize: 22, - fontWeight: '700', - letterSpacing: -0.5, - }, - statLabel: { - color: 'rgba(255,255,255,0.55)', - fontSize: 11, - fontWeight: '500', - textTransform: 'uppercase', - letterSpacing: 0.8, - }, - statDivider: { - width: 1, - height: 32, - backgroundColor: 'rgba(255,255,255,0.2)', - }, -}); diff --git a/apps/computer-vision/app/image_segmentation_live/index.tsx b/apps/computer-vision/app/image_segmentation_live/index.tsx deleted file mode 100644 index f665c63c5..000000000 --- a/apps/computer-vision/app/image_segmentation_live/index.tsx +++ /dev/null @@ -1,292 +0,0 @@ -import React, { - useCallback, - useContext, - useEffect, - useMemo, - useRef, - useState, -} from 'react'; -import { - StatusBar, - StyleSheet, - Text, - TouchableOpacity, - useWindowDimensions, - View, -} from 'react-native'; -import { useSafeAreaInsets } from 'react-native-safe-area-context'; - -import { - Camera, - getCameraFormat, - Templates, - useCameraDevices, - useCameraPermission, - useFrameOutput, -} from 'react-native-vision-camera'; -import { scheduleOnRN } from 'react-native-worklets'; -import { - DEEPLAB_V3_RESNET50, - useImageSegmentation, -} from 'react-native-executorch'; -import { - Canvas, - Image as SkiaImage, - Skia, - AlphaType, - ColorType, - SkImage, -} from '@shopify/react-native-skia'; -import { GeneratingContext } from '../../context'; -import Spinner from '../../components/Spinner'; -import ColorPalette from '../../colors'; - -// RGBA colors for each DeepLab V3 class (alpha = 180 for semi-transparency) -const CLASS_COLORS: number[][] = [ - [0, 0, 0, 0], // 0 background β€” transparent - [51, 255, 87, 180], // 1 aeroplane - [51, 87, 255, 180], // 2 bicycle - [255, 51, 246, 180], // 3 bird - [51, 255, 246, 180], // 4 boat - [243, 255, 51, 180], // 5 bottle - [141, 51, 255, 180], // 6 bus - [255, 131, 51, 180], // 7 car - [51, 255, 131, 180], // 8 cat - [131, 51, 255, 180], // 9 chair - [255, 255, 51, 180], // 10 cow - [51, 255, 255, 180], // 11 diningtable - [255, 51, 143, 180], // 12 dog - [127, 51, 255, 180], // 13 horse - [51, 255, 175, 180], // 14 motorbike - [255, 175, 51, 180], // 15 person - [179, 255, 51, 180], // 16 pottedplant - [255, 87, 51, 180], // 17 sheep - [255, 51, 162, 180], // 18 sofa - [51, 162, 255, 180], // 19 train - [162, 51, 255, 180], // 20 tvmonitor -]; - -export default function ImageSegmentationLiveScreen() { - const insets = useSafeAreaInsets(); - const { width: screenWidth, height: screenHeight } = useWindowDimensions(); - - const { isReady, isGenerating, downloadProgress, runOnFrame } = - useImageSegmentation({ model: DEEPLAB_V3_RESNET50 }); - const { setGlobalGenerating } = useContext(GeneratingContext); - - useEffect(() => { - setGlobalGenerating(isGenerating); - }, [isGenerating, setGlobalGenerating]); - - const [maskImage, setMaskImage] = useState(null); - const [fps, setFps] = useState(0); - const lastFrameTimeRef = useRef(Date.now()); - - const cameraPermission = useCameraPermission(); - const devices = useCameraDevices(); - const device = devices.find((d) => d.position === 'back') ?? devices[0]; - - const format = useMemo(() => { - if (device == null) return undefined; - try { - return getCameraFormat(device, Templates.FrameProcessing); - } catch { - return undefined; - } - }, [device]); - - const updateMask = useCallback((img: SkImage) => { - setMaskImage(img); - const now = Date.now(); - const timeDiff = now - lastFrameTimeRef.current; - if (timeDiff > 0) { - setFps(Math.round(1000 / timeDiff)); - } - lastFrameTimeRef.current = now; - }, []); - - const frameOutput = useFrameOutput({ - pixelFormat: 'rgb', - dropFramesWhileBusy: true, - onFrame(frame) { - 'worklet'; - if (!runOnFrame) { - frame.dispose(); - return; - } - try { - const result = runOnFrame(frame, [], false); - if (result?.ARGMAX) { - const argmax: Int32Array = result.ARGMAX; - // Model output is always square (modelImageSize Γ— modelImageSize). - // Derive width/height from argmax length (sqrt for square output). - const side = Math.round(Math.sqrt(argmax.length)); - const width = side; - const height = side; - - // Build RGBA pixel buffer on the worklet thread to avoid transferring - // the large Int32Array across the workletβ†’RN boundary via scheduleOnRN. - const pixels = new Uint8Array(width * height * 4); - for (let i = 0; i < argmax.length; i++) { - const color = CLASS_COLORS[argmax[i]] ?? [0, 0, 0, 0]; - pixels[i * 4] = color[0]!; - pixels[i * 4 + 1] = color[1]!; - pixels[i * 4 + 2] = color[2]!; - pixels[i * 4 + 3] = color[3]!; - } - - const skData = Skia.Data.fromBytes(pixels); - const img = Skia.Image.MakeImage( - { - width, - height, - alphaType: AlphaType.Unpremul, - colorType: ColorType.RGBA_8888, - }, - skData, - width * 4 - ); - if (img) { - scheduleOnRN(updateMask, img); - } - } - } catch (e) { - console.log('frame error:', String(e)); - } finally { - frame.dispose(); - } - }, - }); - - if (!isReady) { - return ( - - ); - } - - if (!cameraPermission.hasPermission) { - return ( - - Camera access needed - cameraPermission.requestPermission()} - style={styles.button} - > - Grant Permission - - - ); - } - - if (device == null) { - return ( - - No camera device found - - ); - } - - return ( - - - - - - {maskImage && ( - - - - )} - - - - - {fps} - fps - - - - - ); -} - -const styles = StyleSheet.create({ - container: { - flex: 1, - backgroundColor: 'black', - }, - centered: { - flex: 1, - backgroundColor: 'black', - justifyContent: 'center', - alignItems: 'center', - gap: 16, - }, - message: { - color: 'white', - fontSize: 18, - }, - button: { - paddingHorizontal: 24, - paddingVertical: 12, - backgroundColor: ColorPalette.primary, - borderRadius: 24, - }, - buttonText: { - color: 'white', - fontSize: 15, - fontWeight: '600', - letterSpacing: 0.3, - }, - bottomBarWrapper: { - position: 'absolute', - bottom: 0, - left: 0, - right: 0, - alignItems: 'center', - }, - bottomBar: { - flexDirection: 'row', - alignItems: 'center', - backgroundColor: 'rgba(0, 0, 0, 0.55)', - borderRadius: 24, - paddingHorizontal: 28, - paddingVertical: 10, - gap: 24, - }, - statItem: { - alignItems: 'center', - }, - statValue: { - color: 'white', - fontSize: 22, - fontWeight: '700', - letterSpacing: -0.5, - }, - statLabel: { - color: 'rgba(255,255,255,0.55)', - fontSize: 11, - fontWeight: '500', - textTransform: 'uppercase', - letterSpacing: 0.8, - }, -}); diff --git a/apps/computer-vision/app/index.tsx b/apps/computer-vision/app/index.tsx index bf391aeea..ed8712a54 100644 --- a/apps/computer-vision/app/index.tsx +++ b/apps/computer-vision/app/index.tsx @@ -11,6 +11,12 @@ export default function Home() { Select a demo model + router.navigate('vision_camera/')} + > + Vision Camera + router.navigate('classification/')} @@ -29,12 +35,6 @@ export default function Home() { > Object Detection - router.navigate('object_detection_live/')} - > - Object Detection Live - router.navigate('ocr/')} diff --git a/apps/computer-vision/app/object_detection_live/index.tsx b/apps/computer-vision/app/object_detection_live/index.tsx deleted file mode 100644 index d883fe8b9..000000000 --- a/apps/computer-vision/app/object_detection_live/index.tsx +++ /dev/null @@ -1,300 +0,0 @@ -import React, { - useCallback, - useContext, - useEffect, - useMemo, - useRef, - useState, -} from 'react'; -import { - StatusBar, - StyleSheet, - Text, - TouchableOpacity, - View, -} from 'react-native'; -import { useSafeAreaInsets } from 'react-native-safe-area-context'; - -import { - Camera, - getCameraFormat, - Templates, - useCameraDevices, - useCameraPermission, - useFrameOutput, -} from 'react-native-vision-camera'; -import { scheduleOnRN } from 'react-native-worklets'; -import { - Detection, - SSDLITE_320_MOBILENET_V3_LARGE, - useObjectDetection, -} from 'react-native-executorch'; -import { GeneratingContext } from '../../context'; -import Spinner from '../../components/Spinner'; -import ColorPalette from '../../colors'; - -export default function ObjectDetectionLiveScreen() { - const insets = useSafeAreaInsets(); - const [canvasSize, setCanvasSize] = useState({ width: 1, height: 1 }); - - const model = useObjectDetection({ model: SSDLITE_320_MOBILENET_V3_LARGE }); - const { setGlobalGenerating } = useContext(GeneratingContext); - - useEffect(() => { - setGlobalGenerating(model.isGenerating); - }, [model.isGenerating, setGlobalGenerating]); - - const [detections, setDetections] = useState([]); - const [imageSize, setImageSize] = useState({ width: 1, height: 1 }); - const [fps, setFps] = useState(0); - const lastFrameTimeRef = useRef(Date.now()); - - const cameraPermission = useCameraPermission(); - const devices = useCameraDevices(); - const device = devices.find((d) => d.position === 'back') ?? devices[0]; - - const format = useMemo(() => { - if (device == null) return undefined; - try { - return getCameraFormat(device, Templates.FrameProcessing); - } catch { - return undefined; - } - }, [device]); - - const updateDetections = useCallback( - (payload: { - results: Detection[]; - imageWidth: number; - imageHeight: number; - }) => { - setDetections(payload.results); - setImageSize({ width: payload.imageWidth, height: payload.imageHeight }); - const now = Date.now(); - const timeDiff = now - lastFrameTimeRef.current; - if (timeDiff > 0) { - setFps(Math.round(1000 / timeDiff)); - } - lastFrameTimeRef.current = now; - }, - [] - ); - - const frameOutput = useFrameOutput({ - pixelFormat: 'rgb', - dropFramesWhileBusy: true, - onFrame(frame) { - 'worklet'; - if (!model.runOnFrame) { - frame.dispose(); - return; - } - // After 90Β° CW rotation, the image fed to the model has swapped dims. - const imageWidth = - frame.width > frame.height ? frame.height : frame.width; - const imageHeight = - frame.width > frame.height ? frame.width : frame.height; - try { - const result = model.runOnFrame(frame, 0.5); - if (result) { - scheduleOnRN(updateDetections, { - results: result, - imageWidth, - imageHeight, - }); - } - } catch { - // ignore frame errors - } finally { - frame.dispose(); - } - }, - }); - - if (!model.isReady) { - return ( - - ); - } - - if (!cameraPermission.hasPermission) { - return ( - - Camera access needed - cameraPermission.requestPermission()} - style={styles.button} - > - Grant Permission - - - ); - } - - if (device == null) { - return ( - - No camera device found - - ); - } - - return ( - - - - - - {/* Bounding box overlay β€” measured to match the exact camera preview area */} - - setCanvasSize({ - width: e.nativeEvent.layout.width, - height: e.nativeEvent.layout.height, - }) - } - > - {(() => { - // Cover-fit: camera preview scales to fill the canvas, cropping the - // excess. Compute the same transform so bbox pixel coords map correctly. - const scale = Math.max( - canvasSize.width / imageSize.width, - canvasSize.height / imageSize.height - ); - const offsetX = (canvasSize.width - imageSize.width * scale) / 2; - const offsetY = (canvasSize.height - imageSize.height * scale) / 2; - return detections.map((det, i) => { - const left = det.bbox.x1 * scale + offsetX; - const top = det.bbox.y1 * scale + offsetY; - const width = (det.bbox.x2 - det.bbox.x1) * scale; - const height = (det.bbox.y2 - det.bbox.y1) * scale; - return ( - - - - {det.label} {(det.score * 100).toFixed(0)}% - - - - ); - }); - })()} - - - - - - {detections.length} - objects - - - - {fps} - fps - - - - - ); -} - -const styles = StyleSheet.create({ - container: { - flex: 1, - backgroundColor: 'black', - }, - centered: { - flex: 1, - backgroundColor: 'black', - justifyContent: 'center', - alignItems: 'center', - gap: 16, - }, - message: { - color: 'white', - fontSize: 18, - }, - button: { - paddingHorizontal: 24, - paddingVertical: 12, - backgroundColor: ColorPalette.primary, - borderRadius: 24, - }, - buttonText: { - color: 'white', - fontSize: 15, - fontWeight: '600', - letterSpacing: 0.3, - }, - bbox: { - position: 'absolute', - borderWidth: 2, - borderColor: ColorPalette.primary, - borderRadius: 4, - }, - bboxLabel: { - position: 'absolute', - top: -22, - left: -2, - backgroundColor: ColorPalette.primary, - paddingHorizontal: 6, - paddingVertical: 2, - borderRadius: 4, - }, - bboxLabelText: { - color: 'white', - fontSize: 11, - fontWeight: '600', - }, - bottomBarWrapper: { - position: 'absolute', - bottom: 0, - left: 0, - right: 0, - alignItems: 'center', - }, - bottomBar: { - flexDirection: 'row', - alignItems: 'center', - backgroundColor: 'rgba(0, 0, 0, 0.55)', - borderRadius: 24, - paddingHorizontal: 28, - paddingVertical: 10, - gap: 24, - }, - statItem: { - alignItems: 'center', - }, - statValue: { - color: 'white', - fontSize: 22, - fontWeight: '700', - letterSpacing: -0.5, - }, - statLabel: { - color: 'rgba(255,255,255,0.55)', - fontSize: 11, - fontWeight: '500', - textTransform: 'uppercase', - letterSpacing: 0.8, - }, - statDivider: { - width: 1, - height: 32, - backgroundColor: 'rgba(255,255,255,0.2)', - }, -}); diff --git a/apps/computer-vision/app/ocr_live/index.tsx b/apps/computer-vision/app/ocr_live/index.tsx deleted file mode 100644 index a0c93899f..000000000 --- a/apps/computer-vision/app/ocr_live/index.tsx +++ /dev/null @@ -1,329 +0,0 @@ -import React, { - useCallback, - useContext, - useEffect, - useMemo, - useRef, - useState, -} from 'react'; -import { - StatusBar, - StyleSheet, - Text, - TouchableOpacity, - View, -} from 'react-native'; -import { useSafeAreaInsets } from 'react-native-safe-area-context'; - -import { - Camera, - getCameraFormat, - Templates, - useCameraDevices, - useCameraPermission, - useFrameOutput, -} from 'react-native-vision-camera'; -import { scheduleOnRN } from 'react-native-worklets'; -import { OCR_ENGLISH, useOCR, OCRDetection } from 'react-native-executorch'; -import { - Canvas, - Path, - Skia, - Text as SkiaText, - matchFont, -} from '@shopify/react-native-skia'; -import { GeneratingContext } from '../../context'; -import Spinner from '../../components/Spinner'; -import ColorPalette from '../../colors'; - -interface FrameDetections { - detections: OCRDetection[]; - frameWidth: number; - frameHeight: number; -} - -export default function OCRLiveScreen() { - const insets = useSafeAreaInsets(); - const [canvasSize, setCanvasSize] = useState({ width: 1, height: 1 }); - - const { isReady, isGenerating, downloadProgress, runOnFrame } = useOCR({ - model: OCR_ENGLISH, - }); - const { setGlobalGenerating } = useContext(GeneratingContext); - - useEffect(() => { - setGlobalGenerating(isGenerating); - }, [isGenerating, setGlobalGenerating]); - - const [frameDetections, setFrameDetections] = useState({ - detections: [], - frameWidth: 1, - frameHeight: 1, - }); - const [fps, setFps] = useState(0); - const lastFrameTimeRef = useRef(Date.now()); - - const font = matchFont({ fontFamily: 'Helvetica', fontSize: 11 }); - - const cameraPermission = useCameraPermission(); - const devices = useCameraDevices(); - const device = devices.find((d) => d.position === 'back') ?? devices[0]; - - const format = useMemo(() => { - if (device == null) return undefined; - try { - return getCameraFormat(device, Templates.FrameProcessing); - } catch { - return undefined; - } - }, [device]); - - const updateDetections = useCallback((result: FrameDetections) => { - setFrameDetections(result); - const now = Date.now(); - const timeDiff = now - lastFrameTimeRef.current; - if (timeDiff > 0) { - setFps(Math.round(1000 / timeDiff)); - } - lastFrameTimeRef.current = now; - }, []); - - const frameOutput = useFrameOutput({ - dropFramesWhileBusy: true, - pixelFormat: 'rgb', - onFrame(frame) { - 'worklet'; - if (!runOnFrame) { - frame.dispose(); - return; - } - const frameWidth = frame.width; - const frameHeight = frame.height; - try { - const result = runOnFrame(frame); - if (result) { - scheduleOnRN(updateDetections, { - detections: result, - frameWidth, - frameHeight, - }); - } - } catch { - // ignore frame errors - } finally { - frame.dispose(); - } - }, - }); - - if (!isReady) { - return ( - - ); - } - - if (!cameraPermission.hasPermission) { - return ( - - Camera access needed - cameraPermission.requestPermission()} - style={styles.button} - > - Grant Permission - - - ); - } - - if (device == null) { - return ( - - No camera device found - - ); - } - - const { detections, frameWidth, frameHeight } = frameDetections; - - // OCR runs on the raw landscape frame (no rotation applied in native). - // The camera preview displays it as portrait (90Β° CW rotation applied by iOS). - // After rotation the image dimensions become (frameHeight Γ— frameWidth). - // Cover-fit scale uses post-rotation dims to match what the preview shows. - const isLandscape = frameWidth > frameHeight; - const imageW = isLandscape ? frameHeight : frameWidth; - const imageH = isLandscape ? frameWidth : frameHeight; - const scale = Math.max(canvasSize.width / imageW, canvasSize.height / imageH); - const offsetX = (canvasSize.width - imageW * scale) / 2; - const offsetY = (canvasSize.height - imageH * scale) / 2; - - // Map a raw landscape point to screen coords accounting for rotation + cover-fit. - function toScreenX(px: number, py: number) { - // After 90Β° CW: rotated_x = frameHeight - py, rotated_y = px - const rx = isLandscape ? frameHeight - py : px; - return rx * scale + offsetX; - } - function toScreenY(px: number, py: number) { - const ry = isLandscape ? px : py; - return ry * scale + offsetY; - } - - return ( - - - - - - {/* Measure the overlay area, then draw polygons inside a Canvas */} - - setCanvasSize({ - width: e.nativeEvent.layout.width, - height: e.nativeEvent.layout.height, - }) - } - > - - {detections.map((det, i) => { - if (!det.bbox || det.bbox.length < 2) return null; - - const path = Skia.Path.Make(); - path.moveTo( - toScreenX(det.bbox[0]!.x, det.bbox[0]!.y), - toScreenY(det.bbox[0]!.x, det.bbox[0]!.y) - ); - for (let j = 1; j < det.bbox.length; j++) { - path.lineTo( - toScreenX(det.bbox[j]!.x, det.bbox[j]!.y), - toScreenY(det.bbox[j]!.x, det.bbox[j]!.y) - ); - } - path.close(); - - const labelX = toScreenX(det.bbox[0]!.x, det.bbox[0]!.y); - const labelY = Math.max( - 0, - toScreenY(det.bbox[0]!.x, det.bbox[0]!.y) - 4 - ); - - return ( - - - - {font && ( - - )} - - ); - })} - - - - - - - {detections.length} - regions - - - - {fps} - fps - - - - - ); -} - -const styles = StyleSheet.create({ - container: { - flex: 1, - backgroundColor: 'black', - }, - centered: { - flex: 1, - backgroundColor: 'black', - justifyContent: 'center', - alignItems: 'center', - gap: 16, - }, - message: { - color: 'white', - fontSize: 18, - }, - button: { - paddingHorizontal: 24, - paddingVertical: 12, - backgroundColor: ColorPalette.primary, - borderRadius: 24, - }, - buttonText: { - color: 'white', - fontSize: 15, - fontWeight: '600', - letterSpacing: 0.3, - }, - bottomBarWrapper: { - position: 'absolute', - bottom: 0, - left: 0, - right: 0, - alignItems: 'center', - }, - bottomBar: { - flexDirection: 'row', - alignItems: 'center', - backgroundColor: 'rgba(0, 0, 0, 0.55)', - borderRadius: 24, - paddingHorizontal: 28, - paddingVertical: 10, - gap: 24, - }, - statItem: { - alignItems: 'center', - }, - statValue: { - color: 'white', - fontSize: 22, - fontWeight: '700', - letterSpacing: -0.5, - }, - statLabel: { - color: 'rgba(255,255,255,0.55)', - fontSize: 11, - fontWeight: '500', - textTransform: 'uppercase', - letterSpacing: 0.8, - }, - statDivider: { - width: 1, - height: 32, - backgroundColor: 'rgba(255,255,255,0.2)', - }, -}); diff --git a/apps/computer-vision/app/style_transfer_live/index.tsx b/apps/computer-vision/app/style_transfer_live/index.tsx deleted file mode 100644 index 57889313f..000000000 --- a/apps/computer-vision/app/style_transfer_live/index.tsx +++ /dev/null @@ -1,274 +0,0 @@ -import React, { - useCallback, - useContext, - useEffect, - useMemo, - useRef, - useState, -} from 'react'; -import { - StatusBar, - StyleSheet, - Text, - TouchableOpacity, - useWindowDimensions, - View, -} from 'react-native'; -import { useSafeAreaInsets } from 'react-native-safe-area-context'; - -import { - Camera, - getCameraFormat, - Templates, - useCameraDevices, - useCameraPermission, - useFrameOutput, -} from 'react-native-vision-camera'; -import { scheduleOnRN } from 'react-native-worklets'; -import { - STYLE_TRANSFER_RAIN_PRINCESS, - useStyleTransfer, -} from 'react-native-executorch'; -import { - Canvas, - Image as SkiaImage, - Skia, - AlphaType, - ColorType, - SkImage, -} from '@shopify/react-native-skia'; -import { GeneratingContext } from '../../context'; -import Spinner from '../../components/Spinner'; -import ColorPalette from '../../colors'; - -export default function StyleTransferLiveScreen() { - const insets = useSafeAreaInsets(); - const { width: screenWidth, height: screenHeight } = useWindowDimensions(); - - const { isReady, isGenerating, downloadProgress, runOnFrame } = - useStyleTransfer({ model: STYLE_TRANSFER_RAIN_PRINCESS }); - const { setGlobalGenerating } = useContext(GeneratingContext); - - useEffect(() => { - setGlobalGenerating(isGenerating); - }, [isGenerating, setGlobalGenerating]); - - const [styledImage, setStyledImage] = useState(null); - const [fps, setFps] = useState(0); - const lastFrameTimeRef = useRef(Date.now()); - - const cameraPermission = useCameraPermission(); - const devices = useCameraDevices(); - const device = devices.find((d) => d.position === 'back') ?? devices[0]; - - const format = useMemo(() => { - if (device == null) return undefined; - try { - return getCameraFormat(device, Templates.FrameProcessing); - } catch { - return undefined; - } - }, [device]); - - const updateImage = useCallback((img: SkImage) => { - setStyledImage((prev) => { - prev?.dispose(); - return img; - }); - const now = Date.now(); - const timeDiff = now - lastFrameTimeRef.current; - if (timeDiff > 0) { - setFps(Math.round(1000 / timeDiff)); - } - lastFrameTimeRef.current = now; - }, []); - - const frameOutput = useFrameOutput({ - pixelFormat: 'rgb', - dropFramesWhileBusy: true, - onFrame(frame) { - 'worklet'; - if (!runOnFrame) { - frame.dispose(); - return; - } - try { - const result = runOnFrame(frame); - if (result?.dataPtr) { - const { dataPtr, sizes } = result; - const height = sizes[0]; - const width = sizes[1]; - // Build Skia image on the worklet thread β€” avoids transferring the - // large pixel buffer across the workletβ†’RN boundary via scheduleOnRN. - const skData = Skia.Data.fromBytes(dataPtr); - const img = Skia.Image.MakeImage( - { - width, - height, - alphaType: AlphaType.Opaque, - colorType: ColorType.RGBA_8888, - }, - skData, - width * 4 - ); - if (img) { - scheduleOnRN(updateImage, img); - } - } - } catch (e) { - console.log('frame error:', String(e)); - } finally { - frame.dispose(); - } - }, - }); - - if (!isReady) { - return ( - - ); - } - - if (!cameraPermission.hasPermission) { - return ( - - Camera access needed - cameraPermission.requestPermission()} - style={styles.button} - > - Grant Permission - - - ); - } - - if (device == null) { - return ( - - No camera device found - - ); - } - - return ( - - - - {/* Camera always runs to keep frame processing active */} - - - {/* Styled output overlays the camera feed once available */} - {styledImage && ( - - - - )} - - - - - {fps} - fps - - - - candy - style - - - - - ); -} - -const styles = StyleSheet.create({ - container: { - flex: 1, - backgroundColor: 'black', - }, - centered: { - flex: 1, - backgroundColor: 'black', - justifyContent: 'center', - alignItems: 'center', - gap: 16, - }, - message: { - color: 'white', - fontSize: 18, - }, - button: { - paddingHorizontal: 24, - paddingVertical: 12, - backgroundColor: ColorPalette.primary, - borderRadius: 24, - }, - buttonText: { - color: 'white', - fontSize: 15, - fontWeight: '600', - letterSpacing: 0.3, - }, - bottomBarWrapper: { - position: 'absolute', - bottom: 0, - left: 0, - right: 0, - alignItems: 'center', - }, - bottomBar: { - flexDirection: 'row', - alignItems: 'center', - backgroundColor: 'rgba(0, 0, 0, 0.55)', - borderRadius: 24, - paddingHorizontal: 28, - paddingVertical: 10, - gap: 24, - }, - statItem: { - alignItems: 'center', - }, - statValue: { - color: 'white', - fontSize: 22, - fontWeight: '700', - letterSpacing: -0.5, - }, - styleLabel: { - color: 'white', - fontSize: 16, - fontWeight: '700', - }, - statLabel: { - color: 'rgba(255,255,255,0.55)', - fontSize: 11, - fontWeight: '500', - textTransform: 'uppercase', - letterSpacing: 0.8, - }, - statDivider: { - width: 1, - height: 32, - backgroundColor: 'rgba(255,255,255,0.2)', - }, -}); diff --git a/apps/computer-vision/app/vision_camera/index.tsx b/apps/computer-vision/app/vision_camera/index.tsx new file mode 100644 index 000000000..625018849 --- /dev/null +++ b/apps/computer-vision/app/vision_camera/index.tsx @@ -0,0 +1,665 @@ +import React, { + useCallback, + useContext, + useEffect, + useMemo, + useRef, + useState, +} from 'react'; +import { + ScrollView, + StatusBar, + StyleSheet, + Text, + TouchableOpacity, + View, +} from 'react-native'; +import { useSafeAreaInsets } from 'react-native-safe-area-context'; +import { + Camera, + Frame, + getCameraFormat, + Templates, + useCameraDevices, + useCameraPermission, + useFrameOutput, +} from 'react-native-vision-camera'; +import { createSynchronizable, scheduleOnRN } from 'react-native-worklets'; +import { + DEEPLAB_V3_RESNET50, + Detection, + EFFICIENTNET_V2_S, + SSDLITE_320_MOBILENET_V3_LARGE, + useClassification, + useImageSegmentation, + useObjectDetection, +} from 'react-native-executorch'; +import { + AlphaType, + Canvas, + ColorType, + Image as SkiaImage, + Skia, + SkImage, +} from '@shopify/react-native-skia'; +import { GeneratingContext } from '../../context'; +import Spinner from '../../components/Spinner'; +import ColorPalette from '../../colors'; + +type TaskId = 'classification' | 'objectDetection' | 'segmentation'; +type ModelId = 'classification' | 'objectDetection' | 'segmentation'; + +type TaskVariant = { id: ModelId; label: string }; +type Task = { id: TaskId; label: string; variants: TaskVariant[] }; + +const TASKS: Task[] = [ + { + id: 'classification', + label: 'Classify', + variants: [{ id: 'classification', label: 'EfficientNet V2 S' }], + }, + { + id: 'segmentation', + label: 'Segment', + variants: [{ id: 'segmentation', label: 'DeepLab V3' }], + }, + { + id: 'objectDetection', + label: 'Detect', + variants: [{ id: 'objectDetection', label: 'SSDLite MobileNet' }], + }, +]; + +const CLASS_COLORS: number[][] = [ + [0, 0, 0, 0], + [51, 255, 87, 180], + [51, 87, 255, 180], + [255, 51, 246, 180], + [51, 255, 246, 180], + [243, 255, 51, 180], + [141, 51, 255, 180], + [255, 131, 51, 180], + [51, 255, 131, 180], + [131, 51, 255, 180], + [255, 255, 51, 180], + [51, 255, 255, 180], + [255, 51, 143, 180], + [127, 51, 255, 180], + [51, 255, 175, 180], + [255, 175, 51, 180], + [179, 255, 51, 180], + [255, 87, 51, 180], + [255, 51, 162, 180], + [51, 162, 255, 180], + [162, 51, 255, 180], +]; + +function hashLabel(label: string): number { + let hash = 5381; + for (let i = 0; i < label.length; i++) { + hash = (hash + hash * 32 + label.charCodeAt(i)) % 1000003; + } + return 1 + (Math.abs(hash) % (CLASS_COLORS.length - 1)); +} + +function labelColor(label: string): string { + const color = CLASS_COLORS[hashLabel(label)]!; + return `rgba(${color[0]},${color[1]},${color[2]},1)`; +} + +function labelColorBg(label: string): string { + const color = CLASS_COLORS[hashLabel(label)]!; + return `rgba(${color[0]},${color[1]},${color[2]},0.75)`; +} + +const frameKillSwitch = createSynchronizable(false); + +export default function VisionCameraScreen() { + const insets = useSafeAreaInsets(); + const [activeTask, setActiveTask] = useState('classification'); + const [activeModel, setActiveModel] = useState('classification'); + const [canvasSize, setCanvasSize] = useState({ width: 1, height: 1 }); + const { setGlobalGenerating } = useContext(GeneratingContext); + + const classification = useClassification({ + model: EFFICIENTNET_V2_S, + preventLoad: activeModel !== 'classification', + }); + const objectDetection = useObjectDetection({ + model: SSDLITE_320_MOBILENET_V3_LARGE, + preventLoad: activeModel !== 'objectDetection', + }); + const segmentation = useImageSegmentation({ + model: DEEPLAB_V3_RESNET50, + preventLoad: activeModel !== 'segmentation', + }); + + const activeIsGenerating = { + classification: classification.isGenerating, + objectDetection: objectDetection.isGenerating, + segmentation: segmentation.isGenerating, + }[activeModel]; + + useEffect(() => { + setGlobalGenerating(activeIsGenerating); + }, [activeIsGenerating, setGlobalGenerating]); + + const [fps, setFps] = useState(0); + const [frameMs, setFrameMs] = useState(0); + const lastFrameTimeRef = useRef(Date.now()); + const cameraPermission = useCameraPermission(); + const devices = useCameraDevices(); + const device = devices.find((d) => d.position === 'back') ?? devices[0]; + const format = useMemo(() => { + if (device == null) return undefined; + try { + return getCameraFormat(device, Templates.FrameProcessing); + } catch { + return undefined; + } + }, [device]); + + const [classResult, setClassResult] = useState({ label: '', score: 0 }); + const [detections, setDetections] = useState([]); + const [imageSize, setImageSize] = useState({ width: 1, height: 1 }); + const [maskImage, setMaskImage] = useState(null); + + const updateClass = useCallback((r: { label: string; score: number }) => { + setClassResult(r); + const now = Date.now(); + const diff = now - lastFrameTimeRef.current; + if (diff > 0) { + setFps(Math.round(1000 / diff)); + setFrameMs(diff); + } + lastFrameTimeRef.current = now; + }, []); + + const updateFps = useCallback(() => { + const now = Date.now(); + const diff = now - lastFrameTimeRef.current; + if (diff > 0) { + setFps(Math.round(1000 / diff)); + setFrameMs(diff); + } + lastFrameTimeRef.current = now; + }, []); + + const updateDetections = useCallback( + (p: { results: Detection[]; imageWidth: number; imageHeight: number }) => { + setDetections(p.results); + setImageSize({ width: p.imageWidth, height: p.imageHeight }); + updateFps(); + }, + [updateFps] + ); + + const updateMask = useCallback( + (img: SkImage) => { + setMaskImage((prev) => { + prev?.dispose(); + return img; + }); + updateFps(); + }, + [updateFps] + ); + + const classRof = classification.runOnFrame; + const detRof = objectDetection.runOnFrame; + const segRof = segmentation.runOnFrame; + + useEffect(() => { + frameKillSwitch.setBlocking(true); + setMaskImage((prev) => { + prev?.dispose(); + return null; + }); + const id = setTimeout(() => { + frameKillSwitch.setBlocking(false); + }, 300); + return () => clearTimeout(id); + }, [activeModel]); + + const frameOutput = useFrameOutput({ + pixelFormat: 'rgb', + dropFramesWhileBusy: true, + onFrame: useCallback( + (frame: Frame) => { + 'worklet'; + + if (frameKillSwitch.getDirty()) { + frame.dispose(); + return; + } + + try { + if (activeModel === 'classification') { + if (!classRof) return; + const result = classRof(frame); + if (result) { + let bestLabel = ''; + let bestScore = -1; + const entries = Object.entries(result); + for (let i = 0; i < entries.length; i++) { + const [label, score] = entries[i]!; + if ((score as number) > bestScore) { + bestScore = score as number; + bestLabel = label; + } + } + scheduleOnRN(updateClass, { label: bestLabel, score: bestScore }); + } + } else if (activeModel === 'objectDetection') { + if (!detRof) return; + const iw = frame.width > frame.height ? frame.height : frame.width; + const ih = frame.width > frame.height ? frame.width : frame.height; + const result = detRof(frame, 0.5); + if (result) { + scheduleOnRN(updateDetections, { + results: result, + imageWidth: iw, + imageHeight: ih, + }); + } + } else if (activeModel === 'segmentation') { + if (!segRof) return; + const result = segRof(frame, [], false); + if (result?.ARGMAX) { + const argmax: Int32Array = result.ARGMAX; + const side = Math.round(Math.sqrt(argmax.length)); + const pixels = new Uint8Array(side * side * 4); + for (let i = 0; i < argmax.length; i++) { + const color = CLASS_COLORS[argmax[i]!] ?? [0, 0, 0, 0]; + pixels[i * 4] = color[0]!; + pixels[i * 4 + 1] = color[1]!; + pixels[i * 4 + 2] = color[2]!; + pixels[i * 4 + 3] = color[3]!; + } + const skData = Skia.Data.fromBytes(pixels); + const img = Skia.Image.MakeImage( + { + width: side, + height: side, + alphaType: AlphaType.Unpremul, + colorType: ColorType.RGBA_8888, + }, + skData, + side * 4 + ); + if (img) scheduleOnRN(updateMask, img); + } + } + } catch { + // ignore + } finally { + frame.dispose(); + } + }, + [ + activeModel, + classRof, + detRof, + segRof, + updateClass, + updateDetections, + updateMask, + ] + ), + }); + + const activeIsReady = { + classification: classification.isReady, + objectDetection: objectDetection.isReady, + segmentation: segmentation.isReady, + }[activeModel]; + + const activeDownloadProgress = { + classification: classification.downloadProgress, + objectDetection: objectDetection.downloadProgress, + segmentation: segmentation.downloadProgress, + }[activeModel]; + + if (!cameraPermission.hasPermission) { + return ( + + Camera access needed + cameraPermission.requestPermission()} + style={styles.button} + > + Grant Permission + + + ); + } + + if (device == null) { + return ( + + No camera device found + + ); + } + + function coverFit(imgW: number, imgH: number) { + const scale = Math.max(canvasSize.width / imgW, canvasSize.height / imgH); + return { + scale, + offsetX: (canvasSize.width - imgW * scale) / 2, + offsetY: (canvasSize.height - imgH * scale) / 2, + }; + } + + const { + scale: detScale, + offsetX: detOX, + offsetY: detOY, + } = coverFit(imageSize.width, imageSize.height); + + const activeTaskInfo = TASKS.find((t) => t.id === activeTask)!; + const activeVariantLabel = + activeTaskInfo.variants.find((v) => v.id === activeModel)?.label ?? + activeTaskInfo.variants[0]!.label; + + return ( + + + + + + + setCanvasSize({ + width: e.nativeEvent.layout.width, + height: e.nativeEvent.layout.height, + }) + } + > + {activeModel === 'segmentation' && maskImage && ( + + + + )} + + {activeModel === 'objectDetection' && ( + <> + {detections.map((det, i) => { + const left = det.bbox.x1 * detScale + detOX; + const top = det.bbox.y1 * detScale + detOY; + const w = (det.bbox.x2 - det.bbox.x1) * detScale; + const h = (det.bbox.y2 - det.bbox.y1) * detScale; + return ( + + + + {det.label} {(det.score * 100).toFixed(1)} + + + + ); + })} + + )} + + + {activeModel === 'classification' && classResult.label ? ( + + {classResult.label} + + {(classResult.score * 100).toFixed(1)}% + + + ) : null} + + {!activeIsReady && ( + + + + )} + + + + {activeVariantLabel} + + {fps} FPS – {frameMs.toFixed(0)} ms + + + + + {TASKS.map((t) => ( + { + setActiveTask(t.id); + setActiveModel(t.variants[0]!.id); + }} + > + + {t.label} + + + ))} + + + + {activeTaskInfo.variants.map((v) => ( + setActiveModel(v.id)} + > + + {v.label} + + + ))} + + + + ); +} + +const styles = StyleSheet.create({ + container: { flex: 1, backgroundColor: 'black' }, + centered: { + flex: 1, + backgroundColor: 'black', + justifyContent: 'center', + alignItems: 'center', + gap: 16, + }, + message: { color: 'white', fontSize: 18 }, + button: { + paddingHorizontal: 24, + paddingVertical: 12, + backgroundColor: ColorPalette.primary, + borderRadius: 24, + }, + buttonText: { color: 'white', fontSize: 15, fontWeight: '600' }, + loadingOverlay: { + ...StyleSheet.absoluteFillObject, + backgroundColor: 'rgba(0,0,0,0.6)', + justifyContent: 'center', + alignItems: 'center', + }, + + topOverlay: { + position: 'absolute', + top: 0, + left: 0, + right: 0, + alignItems: 'center', + gap: 8, + }, + titleRow: { + alignItems: 'center', + paddingHorizontal: 16, + }, + modelTitle: { + color: 'white', + fontSize: 22, + fontWeight: '700', + textShadowColor: 'rgba(0,0,0,0.7)', + textShadowOffset: { width: 0, height: 1 }, + textShadowRadius: 4, + }, + fpsText: { + color: 'rgba(255,255,255,0.85)', + fontSize: 14, + fontWeight: '500', + marginTop: 2, + textShadowColor: 'rgba(0,0,0,0.7)', + textShadowOffset: { width: 0, height: 1 }, + textShadowRadius: 4, + }, + + tabsContent: { + paddingHorizontal: 12, + gap: 6, + }, + tab: { + paddingHorizontal: 18, + paddingVertical: 7, + borderRadius: 20, + backgroundColor: 'rgba(0,0,0,0.45)', + borderWidth: 1, + borderColor: 'rgba(255,255,255,0.25)', + }, + tabActive: { + backgroundColor: 'rgba(255,255,255,0.2)', + borderColor: 'white', + }, + tabText: { + color: 'rgba(255,255,255,0.7)', + fontSize: 14, + fontWeight: '600', + }, + tabTextActive: { color: 'white' }, + + chipsContent: { + paddingHorizontal: 12, + gap: 6, + }, + variantChip: { + paddingHorizontal: 14, + paddingVertical: 5, + borderRadius: 16, + backgroundColor: 'rgba(0,0,0,0.35)', + borderWidth: 1, + borderColor: 'rgba(255,255,255,0.15)', + }, + variantChipActive: { + backgroundColor: ColorPalette.primary, + borderColor: ColorPalette.primary, + }, + variantChipText: { + color: 'rgba(255,255,255,0.6)', + fontSize: 12, + fontWeight: '500', + }, + variantChipTextActive: { color: 'white' }, + + bbox: { + position: 'absolute', + borderWidth: 2, + borderColor: 'cyan', + borderRadius: 4, + }, + bboxLabel: { + position: 'absolute', + top: -22, + left: -2, + paddingHorizontal: 6, + paddingVertical: 2, + borderRadius: 4, + }, + bboxLabelText: { color: 'white', fontSize: 11, fontWeight: '600' }, + + classResultOverlay: { + ...StyleSheet.absoluteFillObject, + justifyContent: 'center', + alignItems: 'center', + }, + classResultLabel: { + color: 'white', + fontSize: 28, + fontWeight: '700', + textAlign: 'center', + textShadowColor: 'rgba(0,0,0,0.8)', + textShadowOffset: { width: 0, height: 1 }, + textShadowRadius: 6, + paddingHorizontal: 24, + }, + classResultScore: { + color: 'rgba(255,255,255,0.75)', + fontSize: 18, + fontWeight: '500', + marginTop: 4, + textShadowColor: 'rgba(0,0,0,0.8)', + textShadowOffset: { width: 0, height: 1 }, + textShadowRadius: 6, + }, +}); diff --git a/apps/computer-vision/app/vision_camera_live/index.tsx b/apps/computer-vision/app/vision_camera_live/index.tsx deleted file mode 100644 index 8c5d71d33..000000000 --- a/apps/computer-vision/app/vision_camera_live/index.tsx +++ /dev/null @@ -1,796 +0,0 @@ -import React, { - useCallback, - useContext, - useEffect, - useMemo, - useRef, - useState, -} from 'react'; -import { - ScrollView, - StatusBar, - StyleSheet, - Text, - TouchableOpacity, - View, -} from 'react-native'; -import { useSafeAreaInsets } from 'react-native-safe-area-context'; -import { - Camera, - Frame, - getCameraFormat, - Templates, - useCameraDevices, - useCameraPermission, - useFrameOutput, -} from 'react-native-vision-camera'; -import { createSynchronizable, runOnJS } from 'react-native-worklets'; -import { - DEEPLAB_V3_RESNET50, - Detection, - EFFICIENTNET_V2_S, - OCRDetection, - OCR_ENGLISH, - SSDLITE_320_MOBILENET_V3_LARGE, - STYLE_TRANSFER_RAIN_PRINCESS, - useClassification, - useImageSegmentation, - useObjectDetection, - useOCR, - useStyleTransfer, -} from 'react-native-executorch'; -import { - AlphaType, - Canvas, - ColorType, - Image as SkiaImage, - matchFont, - Path, - Skia, - SkImage, - Text as SkiaText, -} from '@shopify/react-native-skia'; -import { GeneratingContext } from '../../context'; -import Spinner from '../../components/Spinner'; -import ColorPalette from '../../colors'; - -// ─── Model IDs ─────────────────────────────────────────────────────────────── - -type ModelId = - | 'classification' - | 'object_detection' - | 'segmentation' - | 'style_transfer' - | 'ocr'; - -const MODELS: { id: ModelId; label: string }[] = [ - { id: 'classification', label: 'Classification' }, - { id: 'object_detection', label: 'Object Detection' }, - { id: 'segmentation', label: 'Segmentation' }, - { id: 'style_transfer', label: 'Style Transfer' }, - { id: 'ocr', label: 'OCR' }, -]; - -const CLASS_COLORS: number[][] = [ - [0, 0, 0, 0], - [51, 255, 87, 180], - [51, 87, 255, 180], - [255, 51, 246, 180], - [51, 255, 246, 180], - [243, 255, 51, 180], - [141, 51, 255, 180], - [255, 131, 51, 180], - [51, 255, 131, 180], - [131, 51, 255, 180], - [255, 255, 51, 180], - [51, 255, 255, 180], - [255, 51, 143, 180], - [127, 51, 255, 180], - [51, 255, 175, 180], - [255, 175, 51, 180], - [179, 255, 51, 180], - [255, 87, 51, 180], - [255, 51, 162, 180], - [51, 162, 255, 180], - [162, 51, 255, 180], -]; - -// ─── Kill switch β€” synchronizable boolean shared between JS and worklet thread. -// setBlocking(true) immediately stops the worklet from dispatching new work -// (both in onFrame and inside the async callback) before the old model tears down. -const frameKillSwitch = createSynchronizable(false); - -// ─── Screen ────────────────────────────────────────────────────────────────── - -export default function VisionCameraLiveScreen() { - const insets = useSafeAreaInsets(); - const [activeModel, setActiveModel] = useState('classification'); - const [canvasSize, setCanvasSize] = useState({ width: 1, height: 1 }); - const { setGlobalGenerating } = useContext(GeneratingContext); - - // ── Models (only the active model loads; others are prevented) ── - const classification = useClassification({ - model: EFFICIENTNET_V2_S, - preventLoad: activeModel !== 'classification', - }); - const objectDetection = useObjectDetection({ - model: SSDLITE_320_MOBILENET_V3_LARGE, - preventLoad: activeModel !== 'object_detection', - }); - const segmentation = useImageSegmentation({ - model: DEEPLAB_V3_RESNET50, - preventLoad: activeModel !== 'segmentation', - }); - const styleTransfer = useStyleTransfer({ - model: STYLE_TRANSFER_RAIN_PRINCESS, - preventLoad: activeModel !== 'style_transfer', - }); - const ocr = useOCR({ - model: OCR_ENGLISH, - preventLoad: activeModel !== 'ocr', - }); - - const activeIsGenerating = { - classification: classification.isGenerating, - object_detection: objectDetection.isGenerating, - segmentation: segmentation.isGenerating, - style_transfer: styleTransfer.isGenerating, - ocr: ocr.isGenerating, - }[activeModel]; - - useEffect(() => { - setGlobalGenerating(activeIsGenerating); - }, [activeIsGenerating, setGlobalGenerating]); - - // ── Camera ── - const [fps, setFps] = useState(0); - const lastFrameTimeRef = useRef(Date.now()); - const cameraPermission = useCameraPermission(); - const devices = useCameraDevices(); - const device = devices.find((d) => d.position === 'back') ?? devices[0]; - const format = useMemo(() => { - if (device == null) return undefined; - try { - return getCameraFormat(device, Templates.FrameProcessing); - } catch { - return undefined; - } - }, [device]); - - // ── Per-model result state ── - const [classResult, setClassResult] = useState({ label: '', score: 0 }); - const [detections, setDetections] = useState([]); - const [imageSize, setImageSize] = useState({ width: 1, height: 1 }); - const [maskImage, setMaskImage] = useState(null); - const [styledImage, setStyledImage] = useState(null); - const [ocrData, setOcrData] = useState<{ - detections: OCRDetection[]; - frameWidth: number; - frameHeight: number; - }>({ detections: [], frameWidth: 1, frameHeight: 1 }); - - // ── Stable callbacks ── - function tick() { - const now = Date.now(); - const diff = now - lastFrameTimeRef.current; - if (diff > 0) setFps(Math.round(1000 / diff)); - lastFrameTimeRef.current = now; - } - - const updateClass = useCallback((r: { label: string; score: number }) => { - setClassResult(r); - tick(); - // eslint-disable-next-line react-hooks/exhaustive-deps - }, []); - - const updateDetections = useCallback( - (p: { results: Detection[]; imageWidth: number; imageHeight: number }) => { - setDetections(p.results); - setImageSize({ width: p.imageWidth, height: p.imageHeight }); - tick(); - }, - // eslint-disable-next-line react-hooks/exhaustive-deps - [] - ); - - const updateMask = useCallback((img: SkImage) => { - setMaskImage((prev) => { - prev?.dispose(); - return img; - }); - tick(); - // eslint-disable-next-line react-hooks/exhaustive-deps - }, []); - - const updateStyled = useCallback((img: SkImage) => { - setStyledImage((prev) => { - prev?.dispose(); - return img; - }); - tick(); - // eslint-disable-next-line react-hooks/exhaustive-deps - }, []); - - const updateOcr = useCallback( - (d: { - detections: OCRDetection[]; - frameWidth: number; - frameHeight: number; - }) => { - setOcrData(d); - tick(); - }, - // eslint-disable-next-line react-hooks/exhaustive-deps - [] - ); - - // ── runOnJS-wrapped callbacks β€” created on the RN thread so the Babel plugin - // can serialize them into remote functions. These can then be safely called - // from any worklet runtime, including the asyncRunner's worker runtime. - const notifyClass = runOnJS(updateClass); - const notifyDetections = runOnJS(updateDetections); - const notifyMask = runOnJS(updateMask); - const notifyStyled = runOnJS(updateStyled); - const notifyOcr = runOnJS(updateOcr); - - // ── Pull the active model's runOnFrame out of the hook each render. - // These are worklet functions (not plain JS objects), so they CAN be - // captured directly in a useCallback closure β€” the worklets runtime - // serializes them correctly. A new closure is produced whenever the - // active runOnFrame changes, causing useFrameOutput to re-register. - const classRof = classification.runOnFrame; - const detRof = objectDetection.runOnFrame; - const segRof = segmentation.runOnFrame; - const stRof = styleTransfer.runOnFrame; - const ocrRof = ocr.runOnFrame; - - // When switching models: activate kill switch synchronously so the worklet - // thread stops calling runOnFrame before delete() fires on the old model. - // Then re-enable once the new model's preventLoad has taken effect. - useEffect(() => { - frameKillSwitch.setBlocking(true); - setMaskImage((prev) => { - prev?.dispose(); - return null; - }); - setStyledImage((prev) => { - prev?.dispose(); - return null; - }); - const id = setTimeout(() => { - frameKillSwitch.setBlocking(false); - }, 300); - return () => clearTimeout(id); - }, [activeModel]); - - // ── Single frame output. - // onFrame is re-created (and re-registered by useFrameOutput) whenever the - // active model or its runOnFrame worklet changes. The kill switch provides - // synchronous cross-thread protection during the transition window. - const frameOutput = useFrameOutput({ - pixelFormat: 'rgb', - dropFramesWhileBusy: true, - onFrame: useCallback( - (frame: Frame) => { - 'worklet'; - - // Kill switch is set synchronously from JS when switching models β€” - // guaranteed visible here before the next frame is dispatched. - if (frameKillSwitch.getDirty()) { - frame.dispose(); - return; - } - - try { - if (activeModel === 'classification') { - if (!classRof) return; - const result = classRof(frame); - if (result) { - let bestLabel = ''; - let bestScore = -1; - const entries = Object.entries(result); - for (let i = 0; i < entries.length; i++) { - const [label, score] = entries[i]!; - if ((score as number) > bestScore) { - bestScore = score as number; - bestLabel = label; - } - } - notifyClass({ - label: bestLabel, - score: bestScore, - }); - } - } else if (activeModel === 'object_detection') { - if (!detRof) return; - const iw = frame.width > frame.height ? frame.height : frame.width; - const ih = frame.width > frame.height ? frame.width : frame.height; - const result = detRof(frame, 0.5); - if (result) { - notifyDetections({ - results: result, - imageWidth: iw, - imageHeight: ih, - }); - } - } else if (activeModel === 'segmentation') { - if (!segRof) return; - const result = segRof(frame, [], false); - if (result?.ARGMAX) { - const argmax: Int32Array = result.ARGMAX; - const side = Math.round(Math.sqrt(argmax.length)); - const pixels = new Uint8Array(side * side * 4); - for (let i = 0; i < argmax.length; i++) { - const color = CLASS_COLORS[argmax[i]!] ?? [0, 0, 0, 0]; - pixels[i * 4] = color[0]!; - pixels[i * 4 + 1] = color[1]!; - pixels[i * 4 + 2] = color[2]!; - pixels[i * 4 + 3] = color[3]!; - } - const skData = Skia.Data.fromBytes(pixels); - const img = Skia.Image.MakeImage( - { - width: side, - height: side, - alphaType: AlphaType.Unpremul, - colorType: ColorType.RGBA_8888, - }, - skData, - side * 4 - ); - if (img) notifyMask(img); - } - } else if (activeModel === 'style_transfer') { - if (!stRof) return; - const result = stRof(frame); - if (result?.dataPtr) { - const { dataPtr, sizes } = result; - const h = sizes[0]!; - const w = sizes[1]!; - const skData = Skia.Data.fromBytes(dataPtr); - const img = Skia.Image.MakeImage( - { - width: w, - height: h, - alphaType: AlphaType.Opaque, - colorType: ColorType.RGBA_8888, - }, - skData, - w * 4 - ); - if (img) notifyStyled(img); - } - } else if (activeModel === 'ocr') { - if (!ocrRof) return; - const fw = frame.width; - const fh = frame.height; - const result = ocrRof(frame); - if (result) { - notifyOcr({ - detections: result, - frameWidth: fw, - frameHeight: fh, - }); - } - } - } catch { - // ignore - } finally { - frame.dispose(); - } - }, - [ - activeModel, - classRof, - detRof, - segRof, - stRof, - ocrRof, - notifyClass, - notifyDetections, - notifyMask, - notifyStyled, - notifyOcr, - ] - ), - }); - - // ── Loading state: only care about the active model ── - const activeIsReady = { - classification: classification.isReady, - object_detection: objectDetection.isReady, - segmentation: segmentation.isReady, - style_transfer: styleTransfer.isReady, - ocr: ocr.isReady, - }[activeModel]; - - const activeDownloadProgress = { - classification: classification.downloadProgress, - object_detection: objectDetection.downloadProgress, - segmentation: segmentation.downloadProgress, - style_transfer: styleTransfer.downloadProgress, - ocr: ocr.downloadProgress, - }[activeModel]; - - if (!cameraPermission.hasPermission) { - return ( - - Camera access needed - cameraPermission.requestPermission()} - style={styles.button} - > - Grant Permission - - - ); - } - - if (device == null) { - return ( - - No camera device found - - ); - } - - // ── Cover-fit helpers ── - function coverFit(imgW: number, imgH: number) { - const scale = Math.max(canvasSize.width / imgW, canvasSize.height / imgH); - return { - scale, - offsetX: (canvasSize.width - imgW * scale) / 2, - offsetY: (canvasSize.height - imgH * scale) / 2, - }; - } - - // ── OCR coord transform ── - const { - detections: ocrDets, - frameWidth: ocrFW, - frameHeight: ocrFH, - } = ocrData; - const ocrIsLandscape = ocrFW > ocrFH; - const ocrImgW = ocrIsLandscape ? ocrFH : ocrFW; - const ocrImgH = ocrIsLandscape ? ocrFW : ocrFH; - const { - scale: ocrScale, - offsetX: ocrOX, - offsetY: ocrOY, - } = coverFit(ocrImgW, ocrImgH); - function ocrToX(px: number, py: number) { - return (ocrIsLandscape ? ocrFH - py : px) * ocrScale + ocrOX; - } - function ocrToY(px: number, py: number) { - return (ocrIsLandscape ? px : py) * ocrScale + ocrOY; - } - - // ── Object detection cover-fit ── - const { - scale: detScale, - offsetX: detOX, - offsetY: detOY, - } = coverFit(imageSize.width, imageSize.height); - - const font = matchFont({ fontFamily: 'Helvetica', fontSize: 11 }); - - return ( - - - - - - {/* ── Overlays ── */} - - setCanvasSize({ - width: e.nativeEvent.layout.width, - height: e.nativeEvent.layout.height, - }) - } - > - {activeModel === 'segmentation' && maskImage && ( - - - - )} - - {activeModel === 'style_transfer' && styledImage && ( - - - - )} - - {activeModel === 'object_detection' && ( - <> - {detections.map((det, i) => { - const left = det.bbox.x1 * detScale + detOX; - const top = det.bbox.y1 * detScale + detOY; - const w = (det.bbox.x2 - det.bbox.x1) * detScale; - const h = (det.bbox.y2 - det.bbox.y1) * detScale; - return ( - - - - {det.label} {(det.score * 100).toFixed(0)}% - - - - ); - })} - - )} - - {activeModel === 'ocr' && ( - - {ocrDets.map((det, i) => { - if (!det.bbox || det.bbox.length < 2) return null; - const path = Skia.Path.Make(); - path.moveTo( - ocrToX(det.bbox[0]!.x, det.bbox[0]!.y), - ocrToY(det.bbox[0]!.x, det.bbox[0]!.y) - ); - for (let j = 1; j < det.bbox.length; j++) { - path.lineTo( - ocrToX(det.bbox[j]!.x, det.bbox[j]!.y), - ocrToY(det.bbox[j]!.x, det.bbox[j]!.y) - ); - } - path.close(); - const lx = ocrToX(det.bbox[0]!.x, det.bbox[0]!.y); - const ly = Math.max( - 0, - ocrToY(det.bbox[0]!.x, det.bbox[0]!.y) - 4 - ); - return ( - - - - {font && ( - - )} - - ); - })} - - )} - - - {!activeIsReady && ( - - m.id === activeModel)?.label} ${(activeDownloadProgress * 100).toFixed(0)}%`} - /> - - )} - - - - {MODELS.map((m) => ( - setActiveModel(m.id)} - > - - {m.label} - - - ))} - - - - - - {activeModel === 'classification' && ( - - - {classResult.label || 'β€”'} - - {classResult.label ? ( - - {(classResult.score * 100).toFixed(1)}% - - ) : null} - - )} - {activeModel === 'object_detection' && ( - - {detections.length} - objects - - )} - {activeModel === 'segmentation' && ( - - DeepLab V3 - segmentation - - )} - {activeModel === 'style_transfer' && ( - - Rain Princess - style - - )} - {activeModel === 'ocr' && ( - - {ocrDets.length} - regions - - )} - - - {fps} - fps - - - - - ); -} - -// ─── Styles ────────────────────────────────────────────────────────────────── - -const styles = StyleSheet.create({ - container: { flex: 1, backgroundColor: 'black' }, - centered: { - flex: 1, - backgroundColor: 'black', - justifyContent: 'center', - alignItems: 'center', - gap: 16, - }, - message: { color: 'white', fontSize: 18 }, - button: { - paddingHorizontal: 24, - paddingVertical: 12, - backgroundColor: ColorPalette.primary, - borderRadius: 24, - }, - buttonText: { color: 'white', fontSize: 15, fontWeight: '600' }, - loadingOverlay: { - ...StyleSheet.absoluteFillObject, - backgroundColor: 'rgba(0,0,0,0.6)', - justifyContent: 'center', - alignItems: 'center', - }, - topBarWrapper: { - position: 'absolute', - top: 0, - left: 0, - right: 0, - }, - pickerContent: { - paddingHorizontal: 12, - gap: 8, - }, - chip: { - paddingHorizontal: 16, - paddingVertical: 8, - borderRadius: 20, - backgroundColor: 'rgba(0,0,0,0.55)', - borderWidth: 1, - borderColor: 'rgba(255,255,255,0.2)', - }, - chipActive: { - backgroundColor: ColorPalette.primary, - borderColor: ColorPalette.primary, - }, - chipText: { - color: 'rgba(255,255,255,0.8)', - fontSize: 13, - fontWeight: '600', - }, - chipTextActive: { color: 'white' }, - bbox: { - position: 'absolute', - borderWidth: 2, - borderColor: ColorPalette.primary, - borderRadius: 4, - }, - bboxLabel: { - position: 'absolute', - top: -22, - left: -2, - backgroundColor: ColorPalette.primary, - paddingHorizontal: 6, - paddingVertical: 2, - borderRadius: 4, - }, - bboxLabelText: { color: 'white', fontSize: 11, fontWeight: '600' }, - bottomBarWrapper: { - position: 'absolute', - bottom: 0, - left: 0, - right: 0, - alignItems: 'center', - }, - bottomBar: { - flexDirection: 'row', - alignItems: 'center', - backgroundColor: 'rgba(0,0,0,0.55)', - borderRadius: 24, - paddingHorizontal: 28, - paddingVertical: 10, - gap: 24, - }, - resultContainer: { alignItems: 'flex-start', maxWidth: 220 }, - resultText: { - color: 'white', - fontSize: 16, - fontWeight: '700', - }, - resultSub: { - color: 'rgba(255,255,255,0.6)', - fontSize: 12, - fontWeight: '500', - }, - statDivider: { - width: 1, - height: 32, - backgroundColor: 'rgba(255,255,255,0.2)', - }, - statItem: { alignItems: 'center' }, - statValue: { - color: 'white', - fontSize: 22, - fontWeight: '700', - letterSpacing: -0.5, - }, - statLabel: { - color: 'rgba(255,255,255,0.55)', - fontSize: 11, - fontWeight: '500', - textTransform: 'uppercase', - letterSpacing: 0.8, - }, -}); From 224cbbf20fd510e2210b77ddea7746e5d4cd4d10 Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Thu, 26 Feb 2026 11:53:27 +0100 Subject: [PATCH 34/37] fix: drawing style transfer image --- .../app/style_transfer/index.tsx | 43 ++++++++++--------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/apps/computer-vision/app/style_transfer/index.tsx b/apps/computer-vision/app/style_transfer/index.tsx index 466900a6f..f7a7022b1 100644 --- a/apps/computer-vision/app/style_transfer/index.tsx +++ b/apps/computer-vision/app/style_transfer/index.tsx @@ -27,6 +27,7 @@ export default function StyleTransferScreen() { const [imageUri, setImageUri] = useState(''); const [styledImage, setStyledImage] = useState(null); + const [canvasSize, setCanvasSize] = useState({ width: 1, height: 1 }); const handleCameraPress = async (isCamera: boolean) => { const image = await getImage(isCamera); @@ -43,16 +44,8 @@ export default function StyleTransferScreen() { const output = await model.forward(imageUri); const height = output.sizes[0]; const width = output.sizes[1]; - // Convert RGB -> RGBA for Skia - const rgba = new Uint8Array(width * height * 4); - const rgb = output.dataPtr; - for (let i = 0; i < width * height; i++) { - rgba[i * 4] = rgb[i * 3]; - rgba[i * 4 + 1] = rgb[i * 3 + 1]; - rgba[i * 4 + 2] = rgb[i * 3 + 2]; - rgba[i * 4 + 3] = 255; - } - const skData = Skia.Data.fromBytes(rgba); + // Native already returns RGBA uint8 β€” use directly + const skData = Skia.Data.fromBytes(output.dataPtr); const img = Skia.Image.MakeImage( { width, @@ -83,16 +76,26 @@ export default function StyleTransferScreen() { {styledImage ? ( - - - + + setCanvasSize({ + width: e.nativeEvent.layout.width, + height: e.nativeEvent.layout.height, + }) + } + > + + + + ) : ( Date: Thu, 26 Feb 2026 13:07:48 +0100 Subject: [PATCH 35/37] fix: tests --- .../app/style_transfer/index.tsx | 1 - .../common/rnexecutorch/tests/CMakeLists.txt | 23 +++++++++++++++---- .../tests/integration/StyleTransferTest.cpp | 8 ++++--- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/apps/computer-vision/app/style_transfer/index.tsx b/apps/computer-vision/app/style_transfer/index.tsx index f7a7022b1..80c3974d4 100644 --- a/apps/computer-vision/app/style_transfer/index.tsx +++ b/apps/computer-vision/app/style_transfer/index.tsx @@ -44,7 +44,6 @@ export default function StyleTransferScreen() { const output = await model.forward(imageUri); const height = output.sizes[0]; const width = output.sizes[1]; - // Native already returns RGBA uint8 β€” use directly const skData = Skia.Data.fromBytes(output.dataPtr); const img = Skia.Image.MakeImage( { diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt b/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt index c45ab9107..79c0b3129 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt +++ b/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt @@ -148,8 +148,11 @@ add_rn_test(BaseModelTests integration/BaseModelTest.cpp) add_rn_test(ClassificationTests integration/ClassificationTest.cpp SOURCES ${RNEXECUTORCH_DIR}/models/classification/Classification.cpp + ${RNEXECUTORCH_DIR}/models/VisionModel.cpp + ${RNEXECUTORCH_DIR}/utils/FrameProcessor.cpp + ${RNEXECUTORCH_DIR}/utils/FrameExtractor.cpp ${IMAGE_UTILS_SOURCES} - LIBS opencv_deps + LIBS opencv_deps android ) add_rn_test(ObjectDetectionTests integration/ObjectDetectionTest.cpp @@ -167,8 +170,11 @@ add_rn_test(ImageEmbeddingsTests integration/ImageEmbeddingsTest.cpp SOURCES ${RNEXECUTORCH_DIR}/models/embeddings/image/ImageEmbeddings.cpp ${RNEXECUTORCH_DIR}/models/embeddings/BaseEmbeddings.cpp + ${RNEXECUTORCH_DIR}/models/VisionModel.cpp + ${RNEXECUTORCH_DIR}/utils/FrameProcessor.cpp + ${RNEXECUTORCH_DIR}/utils/FrameExtractor.cpp ${IMAGE_UTILS_SOURCES} - LIBS opencv_deps + LIBS opencv_deps android ) add_rn_test(TextEmbeddingsTests integration/TextEmbeddingsTest.cpp @@ -182,8 +188,11 @@ add_rn_test(TextEmbeddingsTests integration/TextEmbeddingsTest.cpp add_rn_test(StyleTransferTests integration/StyleTransferTest.cpp SOURCES ${RNEXECUTORCH_DIR}/models/style_transfer/StyleTransfer.cpp + ${RNEXECUTORCH_DIR}/models/VisionModel.cpp + ${RNEXECUTORCH_DIR}/utils/FrameProcessor.cpp + ${RNEXECUTORCH_DIR}/utils/FrameExtractor.cpp ${IMAGE_UTILS_SOURCES} - LIBS opencv_deps + LIBS opencv_deps android ) add_rn_test(VADTests integration/VoiceActivityDetectionTest.cpp @@ -244,8 +253,10 @@ add_rn_test(OCRTests integration/OCRTest.cpp ${RNEXECUTORCH_DIR}/models/ocr/utils/DetectorUtils.cpp ${RNEXECUTORCH_DIR}/models/ocr/utils/RecognitionHandlerUtils.cpp ${RNEXECUTORCH_DIR}/models/ocr/utils/RecognizerUtils.cpp + ${RNEXECUTORCH_DIR}/utils/FrameProcessor.cpp + ${RNEXECUTORCH_DIR}/utils/FrameExtractor.cpp ${IMAGE_UTILS_SOURCES} - LIBS opencv_deps + LIBS opencv_deps android ) add_rn_test(VerticalOCRTests integration/VerticalOCRTest.cpp @@ -258,6 +269,8 @@ add_rn_test(VerticalOCRTests integration/VerticalOCRTest.cpp ${RNEXECUTORCH_DIR}/models/ocr/utils/DetectorUtils.cpp ${RNEXECUTORCH_DIR}/models/ocr/utils/RecognitionHandlerUtils.cpp ${RNEXECUTORCH_DIR}/models/ocr/utils/RecognizerUtils.cpp + ${RNEXECUTORCH_DIR}/utils/FrameProcessor.cpp + ${RNEXECUTORCH_DIR}/utils/FrameExtractor.cpp ${IMAGE_UTILS_SOURCES} - LIBS opencv_deps + LIBS opencv_deps android ) diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/StyleTransferTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/StyleTransferTest.cpp index 5fbf798b6..5d300de83 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/StyleTransferTest.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/StyleTransferTest.cpp @@ -60,7 +60,9 @@ TEST(StyleTransferGenerateTests, MalformedURIThrows) { TEST(StyleTransferGenerateTests, ValidImageReturnsNonNull) { StyleTransfer model(kValidStyleTransferModelPath, nullptr); auto result = model.generateFromString(kValidTestImagePath); - EXPECT_NE(result, nullptr); + EXPECT_NE(result.dataPtr, nullptr); + EXPECT_GT(result.width, 0); + EXPECT_GT(result.height, 0); } TEST(StyleTransferGenerateTests, MultipleGeneratesWork) { @@ -68,8 +70,8 @@ TEST(StyleTransferGenerateTests, MultipleGeneratesWork) { EXPECT_NO_THROW((void)model.generateFromString(kValidTestImagePath)); auto result1 = model.generateFromString(kValidTestImagePath); auto result2 = model.generateFromString(kValidTestImagePath); - EXPECT_NE(result1, nullptr); - EXPECT_NE(result2, nullptr); + EXPECT_NE(result1.dataPtr, nullptr); + EXPECT_NE(result2.dataPtr, nullptr); } TEST(StyleTransferInheritedTests, GetInputShapeWorks) { From a06a8b5439db0addaeee12794035ee01f1757131 Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Thu, 26 Feb 2026 13:52:30 +0100 Subject: [PATCH 36/37] feat: add possibility to switch between front/back camera --- .../app/vision_camera/index.tsx | 63 ++++++++++++++++++- .../rnexecutorch/models/VisionModel.cpp | 11 +--- 2 files changed, 62 insertions(+), 12 deletions(-) diff --git a/apps/computer-vision/app/vision_camera/index.tsx b/apps/computer-vision/app/vision_camera/index.tsx index 625018849..ccf8e41d6 100644 --- a/apps/computer-vision/app/vision_camera/index.tsx +++ b/apps/computer-vision/app/vision_camera/index.tsx @@ -42,6 +42,7 @@ import { Skia, SkImage, } from '@shopify/react-native-skia'; +import Svg, { Path, Polygon } from 'react-native-svg'; import { GeneratingContext } from '../../context'; import Spinner from '../../components/Spinner'; import ColorPalette from '../../colors'; @@ -119,6 +120,9 @@ export default function VisionCameraScreen() { const [activeTask, setActiveTask] = useState('classification'); const [activeModel, setActiveModel] = useState('classification'); const [canvasSize, setCanvasSize] = useState({ width: 1, height: 1 }); + const [cameraPosition, setCameraPosition] = useState<'back' | 'front'>( + 'back' + ); const { setGlobalGenerating } = useContext(GeneratingContext); const classification = useClassification({ @@ -149,7 +153,8 @@ export default function VisionCameraScreen() { const lastFrameTimeRef = useRef(Date.now()); const cameraPermission = useCameraPermission(); const devices = useCameraDevices(); - const device = devices.find((d) => d.position === 'back') ?? devices[0]; + const device = + devices.find((d) => d.position === cameraPosition) ?? devices[0]; const format = useMemo(() => { if (device == null) return undefined; try { @@ -375,7 +380,10 @@ export default function VisionCameraScreen() { /> setCanvasSize({ @@ -422,6 +430,9 @@ export default function VisionCameraScreen() { style={[ styles.bboxLabel, { backgroundColor: labelColorBg(det.label) }, + cameraPosition === 'front' && { + transform: [{ scaleX: -1 }], + }, ]} > @@ -518,6 +529,37 @@ export default function VisionCameraScreen() { ))} + + + + setCameraPosition((p) => (p === 'back' ? 'front' : 'back')) + } + > + + {/* Camera body */} + + {/* Rotate arrows β€” arc with arrowhead around the lens */} + + + + + ); } @@ -662,4 +704,21 @@ const styles = StyleSheet.create({ textShadowOffset: { width: 0, height: 1 }, textShadowRadius: 6, }, + bottomOverlay: { + position: 'absolute', + bottom: 0, + left: 0, + right: 0, + alignItems: 'center', + }, + flipButton: { + width: 56, + height: 56, + borderRadius: 28, + backgroundColor: 'rgba(255,255,255,0.2)', + justifyContent: 'center', + alignItems: 'center', + borderWidth: 1.5, + borderColor: 'rgba(255,255,255,0.4)', + }, }); diff --git a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp index 8f67175c4..c0ce049f2 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp @@ -11,16 +11,7 @@ using namespace facebook; cv::Mat VisionModel::extractFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData) const { auto frameObj = frameData.asObject(runtime); - cv::Mat frame = ::rnexecutorch::utils::extractFrame(runtime, frameObj); - - // Camera sensors natively deliver frames in landscape orientation. - // Rotate 90Β° CW so all models receive upright portrait frames. - if (frame.cols > frame.rows) { - cv::Mat upright; - cv::rotate(frame, upright, cv::ROTATE_90_CLOCKWISE); - return upright; - } - return frame; + return ::rnexecutorch::utils::extractFrame(runtime, frameObj); } cv::Mat VisionModel::extractFromPixels(const JSTensorViewIn &tensorView) const { From 787ea7db1226c3e9070271a6521f395b907eaccd Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Thu, 26 Feb 2026 15:49:29 +0100 Subject: [PATCH 37/37] fix: rotation issue --- apps/computer-vision/app/vision_camera/index.tsx | 1 + .../common/rnexecutorch/models/VisionModel.cpp | 11 ++++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/apps/computer-vision/app/vision_camera/index.tsx b/apps/computer-vision/app/vision_camera/index.tsx index ccf8e41d6..e09bdcc17 100644 --- a/apps/computer-vision/app/vision_camera/index.tsx +++ b/apps/computer-vision/app/vision_camera/index.tsx @@ -377,6 +377,7 @@ export default function VisionCameraScreen() { outputs={[frameOutput]} isActive={true} format={format} + orientationSource="interface" /> frame.rows) { + cv::Mat upright; + cv::rotate(frame, upright, cv::ROTATE_90_CLOCKWISE); + return upright; + } + return frame; } cv::Mat VisionModel::extractFromPixels(const JSTensorViewIn &tensorView) const {