diff --git a/.cspell-wordlist.txt b/.cspell-wordlist.txt
index 2238f7142..6b23cdc46 100644
--- a/.cspell-wordlist.txt
+++ b/.cspell-wordlist.txt
@@ -127,3 +127,4 @@ detr
 metaprogramming
 ktlint
 lefthook
+espeak
\ No newline at end of file
diff --git a/docs/docs/03-hooks/01-natural-language-processing/useTextToSpeech.md b/docs/docs/03-hooks/01-natural-language-processing/useTextToSpeech.md
index b52726c9e..10e9986de 100644
--- a/docs/docs/03-hooks/01-natural-language-processing/useTextToSpeech.md
+++ b/docs/docs/03-hooks/01-natural-language-processing/useTextToSpeech.md
@@ -82,17 +82,24 @@ You need more details? Check the following resources:
 
 ## Running the model
 
-The module provides two ways to generate speech:
+The module provides two ways to generate speech using either raw text or pre-generated phonemes:
 
-1.  [**`forward(text, speed)`**](../../06-api-reference/interfaces/TextToSpeechType.md#forward): Generates the complete audio waveform at once. Returns a promise resolving to a `Float32Array`.
+### Using Text
+
+1.  [**`forward({ text, speed })`**](../../06-api-reference/interfaces/TextToSpeechType.md#forward): Generates the complete audio waveform at once. Returns a promise resolving to a `Float32Array`.
+2.  [**`stream({ text, speed, onNext, ... })`**](../../06-api-reference/interfaces/TextToSpeechType.md#stream): An async generator that yields chunks of audio as they are computed. This is ideal for reducing the "time to first audio" for long sentences.
+
+### Using Phonemes
+
+If you have pre-computed phonemes (e.g., from an external dictionary or a custom G2P model), you can skip the internal phoneme generation step:
+
+1.  [**`forwardFromPhonemes({ phonemes, speed })`**](../../06-api-reference/interfaces/TextToSpeechType.md#forwardfromphonemes): Generates the complete audio waveform from a phoneme string.
+2.  [**`streamFromPhonemes({ phonemes, speed, onNext, ... })`**](../../06-api-reference/interfaces/TextToSpeechType.md#streamfromphonemes): Streams audio chunks generated from a phoneme string.
 
 :::note
-Since it processes the entire text at once, it might take a significant amount of time to produce an audio for long text inputs.
+Since `forward` and `forwardFromPhonemes` process the entire input at once, they might take a significant amount of time to produce audio for long inputs.
 :::
 
-2.  [**`stream({ text, speed })`**](../../06-api-reference/interfaces/TextToSpeechType.md#stream): An async generator that yields chunks of audio as they are computed.
-    This is ideal for reducing the "time to first audio" for long sentences.
-
 ## Example
 
 ### Speech Synthesis
@@ -185,6 +192,48 @@ export default function App() {
 }
 ```
 
+### Synthesis from Phonemes
+
+If you already have a phoneme string obtained from an external source (e.g. the Python `phonemizer` library,
+`espeak-ng`, or any custom phonemizer), you can use `forwardFromPhonemes` or `streamFromPhonemes` to synthesize audio directly, skipping the phoneme generation stage.
+
+```tsx
+import React from 'react';
+import { Button, View } from 'react-native';
+import {
+  useTextToSpeech,
+  KOKORO_MEDIUM,
+  KOKORO_VOICE_AF_HEART,
+} from 'react-native-executorch';
+
+export default function App() {
+  const tts = useTextToSpeech({
+    model: KOKORO_MEDIUM,
+    voice: KOKORO_VOICE_AF_HEART,
+  });
+
+  const synthesizePhonemes = async () => {
+    // Example phonemes for "Hello"
+    const audioData = await tts.forwardFromPhonemes({
+      phonemes:
+        'ɐ mˈæn hˌu dˈʌzᵊnt tɹˈʌst hɪmsˈɛlf, kæn nˈɛvəɹ ɹˈiᵊli tɹˈʌst ˈɛniwˌʌn ˈɛls.',
+    });
+
+    // ... process or play audioData ...
+  };
+
+  return (
+    <View style={{ flex: 1, justifyContent: 'center', alignItems: 'center' }}>
+      <Button
+        title="Synthesize Phonemes"
+        onPress={synthesizePhonemes}
+        disabled={!tts.isReady}
+      />
+    </View>
+  );
+}
+```
+
 ## Supported models
 
 | Model                                                                            | Language |
diff --git a/docs/docs/04-typescript-api/01-natural-language-processing/TextToSpeechModule.md b/docs/docs/04-typescript-api/01-natural-language-processing/TextToSpeechModule.md
index bc297ecf4..53bde1685 100644
--- a/docs/docs/04-typescript-api/01-natural-language-processing/TextToSpeechModule.md
+++ b/docs/docs/04-typescript-api/01-natural-language-processing/TextToSpeechModule.md
@@ -53,16 +53,24 @@ For more information on resource sources, see [loading models](../../01-fundamen
 
 ## Running the model
 
-The module provides two ways to generate speech:
+The module provides two ways to generate speech using either raw text or pre-generated phonemes:
+
+### Using Text
 
 1.  [**`forward(text, speed)`**](../../06-api-reference/classes/TextToSpeechModule.md#forward): Generates the complete audio waveform at once. Returns a promise resolving to a `Float32Array`.
+2.  [**`stream({ text, speed })`**](../../06-api-reference/classes/TextToSpeechModule.md#stream): An async generator that yields chunks of audio as they are computed. This is ideal for reducing the "time to first audio" for long sentences.
+
+### Using Phonemes
+
+If you have pre-computed phonemes (e.g., from an external dictionary or a custom G2P model), you can skip the internal phoneme generation step:
+
+1.  [**`forwardFromPhonemes(phonemes, speed)`**](../../06-api-reference/classes/TextToSpeechModule.md#forwardfromphonemes): Generates the complete audio waveform from a phoneme string.
+2.  [**`streamFromPhonemes({ phonemes, speed })`**](../../06-api-reference/classes/TextToSpeechModule.md#streamfromphonemes): Streams audio chunks generated from a phoneme string.
 
 :::note
-Since it processes the entire text at once, it might take a significant amount of time to produce an audio for long text inputs.
+Since `forward` and `forwardFromPhonemes` process the entire input at once, they might take a significant amount of time to produce audio for long inputs.
 :::
 
-2.  [**`stream({ text, speed })`**](../../06-api-reference/classes/TextToSpeechModule.md#stream): An async generator that yields chunks of audio as they are computed. This is ideal for reducing the "time to first audio" for long sentences.
-
 ## Example
 
 ### Speech Synthesis
@@ -135,3 +143,34 @@ try {
   console.error('Streaming failed:', error);
 }
 ```
+
+### Synthesis from Phonemes
+
+If you already have a phoneme string (e.g., from an external library), you can use `forwardFromPhonemes` or `streamFromPhonemes` to synthesize audio directly, skipping the internal phonemizer stage.
+
+```typescript
+import {
+  TextToSpeechModule,
+  KOKORO_MEDIUM,
+  KOKORO_VOICE_AF_HEART,
+} from 'react-native-executorch';
+
+const tts = new TextToSpeechModule();
+
+await tts.load({
+  model: KOKORO_MEDIUM,
+  voice: KOKORO_VOICE_AF_HEART,
+});
+
+// Example phonemes for "ExecuTorch"
+const waveform = await tts.forwardFromPhonemes('həlˈO wˈɜɹld!', 1.0);
+
+// Or stream from phonemes
+for await (const chunk of tts.streamFromPhonemes({
+  phonemes:
+    'ɐ mˈæn hˌu dˈʌzᵊnt tɹˈʌst hɪmsˈɛlf, kæn nˈɛvəɹ ɹˈiᵊli tɹˈʌst ˈɛniwˌʌn ˈɛls.',
+  speed: 1.0,
+})) {
+  // ... process chunk ...
+}
+```
diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
index d6489c9be..7ece18a93 100644
--- a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
+++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h
@@ -169,6 +169,14 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
       addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
                                        promiseHostFunction<&Model::stream>,
                                        "stream"));
+      addFunctions(JSI_EXPORT_FUNCTION(
+          ModelHostObject<Model>,
+          promiseHostFunction<&Model::generateFromPhonemes>,
+          "generateFromPhonemes"));
+      addFunctions(JSI_EXPORT_FUNCTION(
+          ModelHostObject<Model>,
+          promiseHostFunction<&Model::streamFromPhonemes>,
+          "streamFromPhonemes"));
     }
 
     if constexpr (meta::HasGenerateFromString<Model>) {
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/text_to_speech/kokoro/Kokoro.cpp b/packages/react-native-executorch/common/rnexecutorch/models/text_to_speech/kokoro/Kokoro.cpp
index d73fb6205..52da0fc46 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/text_to_speech/kokoro/Kokoro.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/text_to_speech/kokoro/Kokoro.cpp
@@ -4,6 +4,7 @@
 
 #include <algorithm>
 #include <fstream>
+#include <phonemis/utilities/string_utils.h>
 #include <rnexecutorch/Error.h>
 #include <rnexecutorch/data_processing/Sequential.h>
 
@@ -73,16 +74,9 @@ void Kokoro::loadVoice(const std::string &voiceSource) {
   }
 }
 
-std::vector<float> Kokoro::generate(std::string text, float speed) {
-  if (text.size() > params::kMaxTextSize) {
-    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
-                            "Kokoro: maximum input text size exceeded");
-  }
-
-  // G2P (Grapheme to Phoneme) conversion
-  auto phonemes = phonemizer_.process(text);
-
-  // Divide the phonemes string intro substrings.
+std::vector<float>
+Kokoro::generateFromPhonemesImpl(const std::u32string &phonemes, float speed) {
+  // Divide the phonemes string into substrings.
   // Affects the further calculations only in case of string size
   // exceeding the biggest model's input.
   auto subsentences =
@@ -98,26 +92,20 @@ std::vector<float> Kokoro::generate(std::string text, float speed) {
     size_t pauseMs = params::kPauseValues.contains(lastPhoneme)
                          ? params::kPauseValues.at(lastPhoneme)
                          : params::kDefaultPause;
-    std::vector<float> pause(pauseMs * constants::kSamplesPerMilisecond, 0.F);
 
-    // Add audio part and pause to the main audio vector
+    // Add audio part and silence pause to the main audio vector
     audio.insert(audio.end(), std::make_move_iterator(audioPart.begin()),
                  std::make_move_iterator(audioPart.end()));
-    audio.insert(audio.end(), std::make_move_iterator(pause.begin()),
-                 std::make_move_iterator(pause.end()));
+    audio.resize(audio.size() + pauseMs * constants::kSamplesPerMilisecond,
+                 0.F);
   }
 
   return audio;
 }
 
-void Kokoro::stream(std::string text, float speed,
-                    std::shared_ptr<jsi::Function> callback) {
-  if (text.size() > params::kMaxTextSize) {
-    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
-                            "Kokoro: maximum input text size exceeded");
-  }
-
-  // Build a full callback function
+void Kokoro::streamFromPhonemesImpl(
+    const std::u32string &phonemes, float speed,
+    std::shared_ptr<jsi::Function> callback) {
   auto nativeCallback = [this, callback](const std::vector<float> &audioVec) {
     if (this->isStreaming_) {
       this->callInvoker_->invokeAsync([callback, audioVec](jsi::Runtime &rt) {
@@ -127,21 +115,12 @@ void Kokoro::stream(std::string text, float speed,
     }
   };
 
-  // Mark the beginning of the streaming process
   isStreaming_ = true;
 
-  // G2P (Grapheme to Phoneme) conversion
-  auto phonemes = phonemizer_.process(text);
-
-  // Divide the phonemes string intro substrings.
-  // Use specialized implementation to minimize the latency between the
-  // sentences.
+  // Use LATENCY strategy to minimize the time-to-first-audio for streaming
   auto subsentences =
       partitioner_.divide<Partitioner::Strategy::LATENCY>(phonemes);
 
-  // We follow the implementation of generate() method, but
-  // instead of accumulating results in a vector, we push them
-  // back to the JS side with the callback.
   for (size_t i = 0; i < subsentences.size(); i++) {
     if (!isStreaming_) {
       break;
@@ -151,7 +130,7 @@ void Kokoro::stream(std::string text, float speed,
 
     // Determine the silent padding duration to be stripped from the edges of
     // the generated audio. If a chunk ends with a space or follows one that
-    // did, it indicates a word boundary split – we use a shorter padding (20ms)
+    // did, it indicates a word boundary split – we use a shorter padding
     // to ensure natural speech flow. Otherwise, we use 50ms for standard
     // pauses.
     bool endsWithSpace = (subsentence.back() == U' ');
@@ -161,25 +140,67 @@ void Kokoro::stream(std::string text, float speed,
     // Generate an audio vector with the Kokoro model
     auto audioPart = synthesize(subsentence, speed, paddingMs);
 
-    // Calculate a pause between the sentences
+    // Calculate and append a pause between the sentences
     char32_t lastPhoneme = subsentence.back();
     size_t pauseMs = params::kPauseValues.contains(lastPhoneme)
                          ? params::kPauseValues.at(lastPhoneme)
                          : params::kDefaultPause;
-    std::vector<float> pause(pauseMs * constants::kSamplesPerMilisecond, 0.F);
-
-    // Add pause to the audio vector
-    audioPart.insert(audioPart.end(), std::make_move_iterator(pause.begin()),
-                     std::make_move_iterator(pause.end()));
+    audioPart.resize(
+        audioPart.size() + pauseMs * constants::kSamplesPerMilisecond, 0.F);
 
     // Push the audio right away to the JS side
     nativeCallback(audioPart);
   }
 
-  // Mark the end of the streaming process
   isStreaming_ = false;
 }
 
+std::vector<float> Kokoro::generate(std::string text, float speed) {
+  if (text.size() > params::kMaxTextSize) {
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
+                            "Kokoro: maximum input text size exceeded");
+  }
+
+  // G2P (Grapheme to Phoneme) conversion
+  auto phonemes = phonemizer_.process(text);
+
+  return generateFromPhonemesImpl(phonemes, speed);
+}
+
+std::vector<float> Kokoro::generateFromPhonemes(std::string phonemes,
+                                                float speed) {
+  if (phonemes.empty()) {
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
+                            "Kokoro: phoneme string must not be empty");
+  }
+  return generateFromPhonemesImpl(
+      phonemis::utilities::string_utils::utf8_to_u32string(phonemes), speed);
+}
+
+void Kokoro::stream(std::string text, float speed,
+                    std::shared_ptr<jsi::Function> callback) {
+  if (text.size() > params::kMaxTextSize) {
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
+                            "Kokoro: maximum input text size exceeded");
+  }
+
+  // G2P (Grapheme to Phoneme) conversion
+  auto phonemes = phonemizer_.process(text);
+
+  streamFromPhonemesImpl(phonemes, speed, callback);
+}
+
+void Kokoro::streamFromPhonemes(std::string phonemes, float speed,
+                                std::shared_ptr<jsi::Function> callback) {
+  if (phonemes.empty()) {
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
+                            "Kokoro: phoneme string must not be empty");
+  }
+  streamFromPhonemesImpl(
+      phonemis::utilities::string_utils::utf8_to_u32string(phonemes), speed,
+      callback);
+}
+
 void Kokoro::streamStop() noexcept { isStreaming_ = false; }
 
 std::vector<float> Kokoro::synthesize(const std::u32string &phonemes,
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/text_to_speech/kokoro/Kokoro.h b/packages/react-native-executorch/common/rnexecutorch/models/text_to_speech/kokoro/Kokoro.h
index f27ba8018..d7a4c2ae6 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/text_to_speech/kokoro/Kokoro.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/text_to_speech/kokoro/Kokoro.h
@@ -27,11 +27,22 @@ class Kokoro {
   // Processes the entire text at once, before sending back to the JS side.
   std::vector<float> generate(std::string text, float speed = 1.F);
 
+  // Accepts pre-computed phonemes (as a UTF-8 IPA string) and synthesizes
+  // audio, bypassing the built-in phonemizer. This allows callers to use
+  // an external G2P system (e.g. the Python `phonemizer` library, espeak-ng,
+  // or any custom phonemizer).
+  std::vector<float> generateFromPhonemes(std::string phonemes,
+                                          float speed = 1.F);
+
   // Processes text in chunks, sending each chunk individualy to the JS side
   // with asynchronous callbacks.
   void stream(std::string text, float speed,
               std::shared_ptr<jsi::Function> callback);
 
+  // Streaming variant that accepts pre-computed phonemes instead of text.
+  void streamFromPhonemes(std::string phonemes, float speed,
+                          std::shared_ptr<jsi::Function> callback);
+
   // Stops the streaming process
   void streamStop() noexcept;
 
@@ -42,6 +53,12 @@ class Kokoro {
   // Helper function - loading voice array
   void loadVoice(const std::string &voiceSource);
 
+  // Helper function - shared synthesis pipeline (partition + synthesize)
+  std::vector<float> generateFromPhonemesImpl(const std::u32string &phonemes,
+                                              float speed);
+  void streamFromPhonemesImpl(const std::u32string &phonemes, float speed,
+                              std::shared_ptr<jsi::Function> callback);
+
   // Helper function - generate specialization for given input size
   std::vector<float> synthesize(const std::u32string &phonemes, float speed,
                                 size_t paddingMs = 50);
diff --git a/packages/react-native-executorch/src/hooks/natural_language_processing/useTextToSpeech.ts b/packages/react-native-executorch/src/hooks/natural_language_processing/useTextToSpeech.ts
index b29b4bc8d..1a751f42d 100644
--- a/packages/react-native-executorch/src/hooks/natural_language_processing/useTextToSpeech.ts
+++ b/packages/react-native-executorch/src/hooks/natural_language_processing/useTextToSpeech.ts
@@ -3,8 +3,11 @@ import { TextToSpeechModule } from '../../modules/natural_language_processing/Te
 import {
   TextToSpeechProps,
   TextToSpeechInput,
+  TextToSpeechPhonemeInput,
   TextToSpeechType,
+  TextToSpeechStreamingCallbacks,
   TextToSpeechStreamingInput,
+  TextToSpeechStreamingPhonemeInput,
 } from '../../types/tts';
 import { RnExecutorchErrorCode } from '../../errors/ErrorCodes';
 import { RnExecutorchError, parseUnknownError } from '../../errors/errorUtils';
@@ -62,17 +65,47 @@ export const useTextToSpeech = ({
     preventLoad,
   ]);
 
-  const forward = async (input: TextToSpeechInput) => {
+  // Shared guard for all generation methods
+  const guardReady = (methodName: string) => {
     if (!isReady)
       throw new RnExecutorchError(
         RnExecutorchErrorCode.ModuleNotLoaded,
-        'The model is currently not loaded. Please load the model before calling forward().'
+        `The model is currently not loaded. Please load the model before calling ${methodName}().`
       );
     if (isGenerating)
       throw new RnExecutorchError(
         RnExecutorchErrorCode.ModelGenerating,
         'The model is currently generating. Please wait until previous model run is complete.'
       );
+  };
+
+  // Shared streaming orchestration (guards + onBegin/onNext/onEnd lifecycle)
+  const runStream = useCallback(
+    async (
+      methodName: string,
+      generator: AsyncGenerator<Float32Array>,
+      callbacks: TextToSpeechStreamingCallbacks
+    ) => {
+      guardReady(methodName);
+      setIsGenerating(true);
+      try {
+        await callbacks.onBegin?.();
+        for await (const audio of generator) {
+          if (callbacks.onNext) {
+            await callbacks.onNext(audio);
+          }
+        }
+      } finally {
+        await callbacks.onEnd?.();
+        setIsGenerating(false);
+      }
+    },
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+    [isReady, isGenerating, moduleInstance]
+  );
+
+  const forward = async (input: TextToSpeechInput) => {
+    guardReady('forward');
     try {
       setIsGenerating(true);
       return await moduleInstance.forward(input.text, input.speed ?? 1.0);
@@ -81,35 +114,42 @@ export const useTextToSpeech = ({
     }
   };
 
+  const forwardFromPhonemes = async (input: TextToSpeechPhonemeInput) => {
+    guardReady('forwardFromPhonemes');
+    try {
+      setIsGenerating(true);
+      return await moduleInstance.forwardFromPhonemes(
+        input.phonemes,
+        input.speed ?? 1.0
+      );
+    } finally {
+      setIsGenerating(false);
+    }
+  };
+
   const stream = useCallback(
     async (input: TextToSpeechStreamingInput) => {
-      if (!isReady)
-        throw new RnExecutorchError(
-          RnExecutorchErrorCode.ModuleNotLoaded,
-          'The model is currently not loaded. Please load the model before calling stream().'
-        );
-      if (isGenerating)
-        throw new RnExecutorchError(
-          RnExecutorchErrorCode.ModelGenerating,
-          'The model is currently generating. Please wait until previous model run is complete.'
-        );
-      setIsGenerating(true);
-      try {
-        await input.onBegin?.();
-        for await (const audio of moduleInstance.stream({
-          text: input.text,
+      await runStream(
+        'stream',
+        moduleInstance.stream({ text: input.text, speed: input.speed ?? 1.0 }),
+        input
+      );
+    },
+    [runStream, moduleInstance]
+  );
+
+  const streamFromPhonemes = useCallback(
+    async (input: TextToSpeechStreamingPhonemeInput) => {
+      await runStream(
+        'streamFromPhonemes',
+        moduleInstance.streamFromPhonemes({
+          phonemes: input.phonemes,
           speed: input.speed ?? 1.0,
-        })) {
-          if (input.onNext) {
-            await input.onNext(audio);
-          }
-        }
-      } finally {
-        await input.onEnd?.();
-        setIsGenerating(false);
-      }
+        }),
+        input
+      );
     },
-    [isReady, isGenerating, moduleInstance]
+    [runStream, moduleInstance]
   );
 
   return {
@@ -117,7 +157,9 @@ export const useTextToSpeech = ({
     isReady,
     isGenerating,
     forward,
+    forwardFromPhonemes,
     stream,
+    streamFromPhonemes,
     streamStop: moduleInstance.streamStop,
     downloadProgress,
   };
diff --git a/packages/react-native-executorch/src/modules/natural_language_processing/TextToSpeechModule.ts b/packages/react-native-executorch/src/modules/natural_language_processing/TextToSpeechModule.ts
index 849c25676..932f166e7 100644
--- a/packages/react-native-executorch/src/modules/natural_language_processing/TextToSpeechModule.ts
+++ b/packages/react-native-executorch/src/modules/natural_language_processing/TextToSpeechModule.ts
@@ -5,6 +5,7 @@ import {
   KokoroConfig,
   TextToSpeechConfig,
   TextToSpeechStreamingInput,
+  TextToSpeechStreamingPhonemeInput,
   VoiceConfig,
 } from '../../types/tts';
 import { Logger } from '../../common/Logger';
@@ -98,6 +99,14 @@ export class TextToSpeechModule {
     }
   }
 
+  private ensureLoaded(methodName: string): void {
+    if (this.nativeModule == null)
+      throw new RnExecutorchError(
+        RnExecutorchErrorCode.ModuleNotLoaded,
+        `The model is currently not loaded. Please load the model before calling ${methodName}().`
+      );
+  }
+
   /**
    * Synthesizes the provided text into speech.
    * Returns a promise that resolves to the full audio waveform as a `Float32Array`.
@@ -110,25 +119,34 @@ export class TextToSpeechModule {
     text: string,
     speed: number = 1.0
   ): Promise<Float32Array> {
-    if (this.nativeModule == null)
-      throw new RnExecutorchError(
-        RnExecutorchErrorCode.ModuleNotLoaded,
-        'The model is currently not loaded. Please load the model before calling forward().'
-      );
+    this.ensureLoaded('forward');
     return await this.nativeModule.generate(text, speed);
   }
 
   /**
-   * Starts a streaming synthesis session. Yields audio chunks as they are generated.
+   * Synthesizes pre-computed phonemes into speech, bypassing the built-in phonemizer.
+   * This allows using an external G2P system (e.g. the Python `phonemizer` library,
+   * espeak-ng, or any custom phonemizer).
    *
-   * @param input - Input object containing text and optional speed.
-   * @returns An async generator yielding Float32Array audio chunks.
+   * @param phonemes The pre-computed IPA phoneme string.
+   * @param speed Optional speed multiplier for the speech synthesis (default is 1.0).
+   * @returns A promise resolving to the synthesized audio waveform.
    */
-  public async *stream({
-    text,
-    speed,
-  }: TextToSpeechStreamingInput): AsyncGenerator<Float32Array> {
-    // Stores computed audio segments
+  public async forwardFromPhonemes(
+    phonemes: string,
+    speed: number = 1.0
+  ): Promise<Float32Array> {
+    this.ensureLoaded('forwardFromPhonemes');
+    return await this.nativeModule.generateFromPhonemes(phonemes, speed);
+  }
+
+  /**
+   * Shared streaming implementation. Wraps a native streaming call in an
+   * async generator that yields Float32Array audio chunks as they arrive.
+   */
+  private async *streamImpl(
+    nativeCall: (cb: (audio: number[]) => void) => Promise<void>
+  ): AsyncGenerator<Float32Array> {
     const queue: Float32Array[] = [];
 
     let waiter: (() => void) | null = null;
@@ -142,7 +160,7 @@ export class TextToSpeechModule {
 
     (async () => {
       try {
-        await this.nativeModule.stream(text, speed, (audio: number[]) => {
+        await nativeCall((audio: number[]) => {
           queue.push(new Float32Array(audio));
           wake();
         });
@@ -169,6 +187,35 @@ export class TextToSpeechModule {
     }
   }
 
+  /**
+   * Starts a streaming synthesis session. Yields audio chunks as they are generated.
+   *
+   * @param input - Input object containing text and optional speed.
+   * @returns An async generator yielding Float32Array audio chunks.
+   */
+  public async *stream({
+    text,
+    speed,
+  }: TextToSpeechStreamingInput): AsyncGenerator<Float32Array> {
+    yield* this.streamImpl((cb) => this.nativeModule.stream(text, speed, cb));
+  }
+
+  /**
+   * Starts a streaming synthesis session from pre-computed phonemes.
+   * Bypasses the built-in phonemizer, allowing use of external G2P systems.
+   *
+   * @param input - Input object containing phonemes and optional speed.
+   * @returns An async generator yielding Float32Array audio chunks.
+   */
+  public async *streamFromPhonemes({
+    phonemes,
+    speed,
+  }: TextToSpeechStreamingPhonemeInput): AsyncGenerator<Float32Array> {
+    yield* this.streamImpl((cb) =>
+      this.nativeModule.streamFromPhonemes(phonemes, speed, cb)
+    );
+  }
+
   /**
    * Stops the streaming process if there is any ongoing.
    */
diff --git a/packages/react-native-executorch/src/types/tts.ts b/packages/react-native-executorch/src/types/tts.ts
index 55937be49..ebc4b065a 100644
--- a/packages/react-native-executorch/src/types/tts.ts
+++ b/packages/react-native-executorch/src/types/tts.ts
@@ -90,6 +90,21 @@ export interface TextToSpeechInput {
   speed?: number;
 }
 
+/**
+ * Text to Speech module input for pre-computed phonemes.
+ * Use this when you have your own phonemizer (e.g. the Python `phonemizer`
+ * library, espeak-ng, or any custom G2P system) and want to bypass the
+ * built-in phonemizer pipeline.
+ *
+ * @category Types
+ * @property {string} phonemes - pre-computed IPA phoneme string
+ * @property {number} [speed] - optional speed argument - the higher it is, the faster the speech becomes
+ */
+export interface TextToSpeechPhonemeInput {
+  phonemes: string;
+  speed?: number;
+}
+
 /**
  * Return type for the `useTextToSpeech` hook.
  * Manages the state and operations for Text-to-Speech generation.
@@ -125,6 +140,18 @@ export interface TextToSpeechType {
    */
   forward: (input: TextToSpeechInput) => Promise<Float32Array>;
 
+  /**
+   * Synthesizes pre-computed phonemes into speech audio in a single pass.
+   * Bypasses the built-in phonemizer, allowing use of external G2P systems.
+   *
+   * @param input - The `TextToSpeechPhonemeInput` object containing pre-computed `phonemes` and optional `speed`.
+   * @returns A Promise that resolves with the generated audio data.
+   * @throws {RnExecutorchError} If the model is not loaded or is currently generating.
+   */
+  forwardFromPhonemes: (
+    input: TextToSpeechPhonemeInput
+  ) => Promise<Float32Array>;
+
   /**
    * Streams the generated audio data incrementally.
    * This is optimal for real-time playback, allowing audio to start playing before the full text is synthesized.
@@ -134,6 +161,17 @@ export interface TextToSpeechType {
    */
   stream: (input: TextToSpeechStreamingInput) => Promise<void>;
 
+  /**
+   * Streams pre-computed phonemes incrementally, bypassing the built-in phonemizer.
+   *
+   * @param input - The streaming input with pre-computed `phonemes` instead of `text`.
+   * @returns A Promise that resolves when the streaming process is complete.
+   * @throws {RnExecutorchError} If the model is not loaded or is currently generating.
+   */
+  streamFromPhonemes: (
+    input: TextToSpeechStreamingPhonemeInput
+  ) => Promise<void>;
+
   /**
    * Interrupts and stops the currently active audio generation stream.
    */
@@ -141,20 +179,37 @@ export interface TextToSpeechType {
 }
 
 /**
- * Text to Speech streaming input definition
- *
- * Streaming mode in T2S is synchronized by passing specific callbacks
- * executed at given moments of the streaming.
- * Actions such as playing the audio should happen within the onNext callback.
- * Callbacks can be both synchronous or asynchronous.
+ * Shared streaming lifecycle callbacks for TTS streaming modes.
  *
  * @category Types
  * @property {() => void | Promise<void>} [onBegin] - Called when streaming begins
  * @property {(audio: Float32Array) => void | Promise<void>} [onNext] - Called after each audio chunk gets calculated.
  * @property {() => void | Promise<void>} [onEnd] - Called when streaming ends
  */
-export interface TextToSpeechStreamingInput extends TextToSpeechInput {
+export interface TextToSpeechStreamingCallbacks {
   onBegin?: () => void | Promise<void>;
   onNext?: (audio: Float32Array) => void | Promise<void>;
   onEnd?: () => void | Promise<void>;
 }
+
+/**
+ * Text to Speech streaming input definition
+ *
+ * Streaming mode in T2S is synchronized by passing specific callbacks
+ * executed at given moments of the streaming.
+ * Actions such as playing the audio should happen within the onNext callback.
+ * Callbacks can be both synchronous or asynchronous.
+ *
+ * @category Types
+ */
+export interface TextToSpeechStreamingInput
+  extends TextToSpeechInput, TextToSpeechStreamingCallbacks {}
+
+/**
+ * Streaming input definition for pre-computed phonemes.
+ * Same as `TextToSpeechStreamingInput` but accepts `phonemes` instead of `text`.
+ *
+ * @category Types
+ */
+export interface TextToSpeechStreamingPhonemeInput
+  extends TextToSpeechPhonemeInput, TextToSpeechStreamingCallbacks {}