microsoft · rui-ren · Mar 5, 2026 · Mar 5, 2026 · Mar 5, 2026 · Mar 11, 2026
diff --git a/sdk_v2/cs/src/AssemblyInfo.cs b/sdk_v2/cs/src/AssemblyInfo.cs
@@ -7,4 +7,5 @@
 using System.Runtime.CompilerServices;
 
 [assembly: InternalsVisibleTo("Microsoft.AI.Foundry.Local.Tests")]
+[assembly: InternalsVisibleTo("AudioStreamTest")]
 [assembly: InternalsVisibleTo("DynamicProxyGenAssembly2")] // for Mock of ICoreInterop
diff --git a/sdk_v2/cs/src/Detail/CoreInterop.cs b/sdk_v2/cs/src/Detail/CoreInterop.cs
@@ -158,6 +158,31 @@ private static unsafe partial void CoreExecuteCommandWithCallback(RequestBuffer*
                                                                       nint callbackPtr, // NativeCallbackFn pointer
                                                                       nint userData);
 
+    [LibraryImport(LibraryName, EntryPoint = "execute_command_with_binary")]
+    [UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
+    private static unsafe partial void CoreExecuteCommandWithBinary(StreamingRequestBuffer* nativeRequest,
+                                                                     ResponseBuffer* nativeResponse);
+
+    // --- Audio streaming P/Invoke imports (kept for future dedicated entry points) ---
+
+    [LibraryImport(LibraryName, EntryPoint = "audio_stream_start")]
+    [UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
+    private static unsafe partial void CoreAudioStreamStart(
+        RequestBuffer* request,
+        ResponseBuffer* response);
+
+    [LibraryImport(LibraryName, EntryPoint = "audio_stream_push")]
+    [UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
+    private static unsafe partial void CoreAudioStreamPush(
+        StreamingRequestBuffer* request,
+        ResponseBuffer* response);
+
+    [LibraryImport(LibraryName, EntryPoint = "audio_stream_stop")]
+    [UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
+    private static unsafe partial void CoreAudioStreamStop(
+        RequestBuffer* request,
+        ResponseBuffer* response);
+
     // helper to capture exceptions in callbacks
     internal class CallbackHelper
     {
@@ -331,4 +356,94 @@ public Task<Response> ExecuteCommandWithCallbackAsync(string commandName, CoreIn
         return Task.Run(() => ExecuteCommandWithCallback(commandName, commandInput, callback), ct);
     }
 
+    /// <summary>
+    /// Marshal a ResponseBuffer from unmanaged memory into a managed Response and free the unmanaged memory.
+    /// </summary>
+    private Response MarshalResponse(ResponseBuffer response)
+    {
+        Response result = new();
+
+        if (response.Data != IntPtr.Zero && response.DataLength > 0)
+        {
+            byte[] managedResponse = new byte[response.DataLength];
+            Marshal.Copy(response.Data, managedResponse, 0, response.DataLength);
+            result.Data = System.Text.Encoding.UTF8.GetString(managedResponse);
+        }
+
+        if (response.Error != IntPtr.Zero && response.ErrorLength > 0)
+        {
+            result.Error = Marshal.PtrToStringUTF8(response.Error, response.ErrorLength)!;
+        }
+
+        Marshal.FreeHGlobal(response.Data);
+        Marshal.FreeHGlobal(response.Error);
+
+        return result;
+    }
+
+    // --- Audio streaming managed implementations ---
+    // Route through the existing execute_command / execute_command_with_binary entry points.
+    // The Core handles audio_stream_start / audio_stream_stop as command cases in ExecuteCommandManaged,
+    // and audio_stream_push as a command case in ExecuteCommandWithBinaryManaged.
+
+    public Response StartAudioStream(CoreInteropRequest request)
+    {
+        return ExecuteCommand("audio_stream_start", request);
+    }
+
+    public Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory<byte> audioData)
+    {
+        try
+        {
+            var commandInputJson = request.ToJson();
+            byte[] commandBytes = System.Text.Encoding.UTF8.GetBytes("audio_stream_push");
+            byte[] inputBytes = System.Text.Encoding.UTF8.GetBytes(commandInputJson);
+
+            IntPtr commandPtr = Marshal.AllocHGlobal(commandBytes.Length);
+            Marshal.Copy(commandBytes, 0, commandPtr, commandBytes.Length);
+
+            IntPtr inputPtr = Marshal.AllocHGlobal(inputBytes.Length);
+            Marshal.Copy(inputBytes, 0, inputPtr, inputBytes.Length);
+
+            // Pin the managed audio data so GC won't move it during the native call
+            using var audioHandle = audioData.Pin();
+
+            unsafe
+            {
+                var reqBuf = new StreamingRequestBuffer
+                {
+                    Command = commandPtr,
+                    CommandLength = commandBytes.Length,
+                    Data = inputPtr,
+                    DataLength = inputBytes.Length,
+                    BinaryData = (nint)audioHandle.Pointer,
+                    BinaryDataLength = audioData.Length
+                };
+
+                ResponseBuffer response = default;
+
+                try
+                {
+                    CoreExecuteCommandWithBinary(&reqBuf, &response);
+                }
+                finally
+                {
+                    Marshal.FreeHGlobal(commandPtr);
+                    Marshal.FreeHGlobal(inputPtr);
+                }
+
+                return MarshalResponse(response);
+            }
+        }
+        catch (Exception ex) when (ex is not OperationCanceledException)
+        {
+            throw new FoundryLocalException("Error executing audio_stream_push", ex, _logger);
+        }
+    }
+
+    public Response StopAudioStream(CoreInteropRequest request)
+    {
+        return ExecuteCommand("audio_stream_stop", request);
+    }
+
 }
diff --git a/sdk_v2/cs/src/Detail/ICoreInterop.cs b/sdk_v2/cs/src/Detail/ICoreInterop.cs
@@ -51,4 +51,21 @@ Task<Response> ExecuteCommandAsync(string commandName, CoreInteropRequest? comma
     Task<Response> ExecuteCommandWithCallbackAsync(string commandName, CoreInteropRequest? commandInput,
                                                    CallbackFn callback,
                                                    CancellationToken? ct = null);
+
+    // --- Audio streaming session support ---
+
+    [StructLayout(LayoutKind.Sequential)]
+    protected unsafe struct StreamingRequestBuffer
+    {
+        public nint Command;
+        public int CommandLength;
+        public nint Data;          // JSON params
+        public int DataLength;
+        public nint BinaryData;    // raw PCM audio bytes
+        public int BinaryDataLength;
+    }
+
+    Response StartAudioStream(CoreInteropRequest request);
+    Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory<byte> audioData);
+    Response StopAudioStream(CoreInteropRequest request);
 }
diff --git a/sdk_v2/cs/src/Detail/JsonSerializationContext.cs b/sdk_v2/cs/src/Detail/JsonSerializationContext.cs
@@ -33,6 +33,9 @@ namespace Microsoft.AI.Foundry.Local.Detail;
 [JsonSerializable(typeof(IList<FunctionDefinition>))]
 [JsonSerializable(typeof(PropertyDefinition))]
 [JsonSerializable(typeof(IList<PropertyDefinition>))]
+// --- NEW: Audio streaming types ---
+[JsonSerializable(typeof(LiveAudioTranscriptionResult))]
+[JsonSerializable(typeof(CoreErrorResponse))]
 [JsonSourceGenerationOptions(DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
                              WriteIndented = false)]
 internal partial class JsonSerializationContext : JsonSerializerContext

diff --git a/sdk_v2/cs/src/OpenAI/AudioClient.cs b/sdk_v2/cs/src/OpenAI/AudioClient.cs
@@ -8,7 +8,6 @@ namespace Microsoft.AI.Foundry.Local;
 
 using System.Runtime.CompilerServices;
 using System.Threading.Channels;
-
 using Betalgo.Ranul.OpenAI.ObjectModels.RequestModels;
 using Betalgo.Ranul.OpenAI.ObjectModels.ResponseModels;
 
@@ -46,6 +45,16 @@ public record AudioSettings
     /// </summary>
     public AudioSettings Settings { get; } = new();
 
+    /// <summary>
+    /// Create a real-time streaming transcription session.
+    /// Audio data is pushed in as PCM chunks and transcription results are returned as an async stream.
+    /// </summary>
+    /// <returns>A streaming session that must be disposed when done.</returns>
+    public LiveAudioTranscriptionSession CreateLiveTranscriptionSession()
+    {
+        return new LiveAudioTranscriptionSession(_modelId);
+    }
+
     /// <summary>
     /// Transcribe audio from a file.
     /// </summary>
@@ -63,28 +72,6 @@ public async Task<AudioCreateTranscriptionResponse> TranscribeAudioAsync(string
                                                     .ConfigureAwait(false);
     }
 
-    /// <summary>
-    /// Transcribe audio from a file with streamed output.
-    /// </summary>
-    /// <param name="audioFilePath">
-    /// Path to file containing audio recording.
-    /// Supported formats: mp3
-    /// </param>
-    /// <param name="ct">Cancellation token.</param>
-    /// <returns>An asynchronous enumerable of transcription responses.</returns>
-    public async IAsyncEnumerable<AudioCreateTranscriptionResponse> TranscribeAudioStreamingAsync(
-        string audioFilePath, [EnumeratorCancellation] CancellationToken ct)
-    {
-        var enumerable = Utils.CallWithExceptionHandling(
-            () => TranscribeAudioStreamingImplAsync(audioFilePath, ct),
-            "Error during streaming audio transcription.", _logger).ConfigureAwait(false);
-
-        await foreach (var item in enumerable)
-        {
-            yield return item;
-        }
-    }
-
     private async Task<AudioCreateTranscriptionResponse> TranscribeAudioImplAsync(string audioFilePath,
                                                                                   CancellationToken? ct)
     {
@@ -108,6 +95,7 @@ private async Task<AudioCreateTranscriptionResponse> TranscribeAudioImplAsync(st
         return output;
     }
 
+
     private async IAsyncEnumerable<AudioCreateTranscriptionResponse> TranscribeAudioStreamingImplAsync(
         string audioFilePath, [EnumeratorCancellation] CancellationToken ct)
     {