Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sdk_v2/cs/src/AssemblyInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@
using System.Runtime.CompilerServices;

[assembly: InternalsVisibleTo("Microsoft.AI.Foundry.Local.Tests")]
[assembly: InternalsVisibleTo("AudioStreamTest")]
[assembly: InternalsVisibleTo("DynamicProxyGenAssembly2")] // for Mock of ICoreInterop
115 changes: 115 additions & 0 deletions sdk_v2/cs/src/Detail/CoreInterop.cs
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,31 @@ private static unsafe partial void CoreExecuteCommandWithCallback(RequestBuffer*
nint callbackPtr, // NativeCallbackFn pointer
nint userData);

[LibraryImport(LibraryName, EntryPoint = "execute_command_with_binary")]
[UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
private static unsafe partial void CoreExecuteCommandWithBinary(StreamingRequestBuffer* nativeRequest,
ResponseBuffer* nativeResponse);

// --- Audio streaming P/Invoke imports (kept for future dedicated entry points) ---

[LibraryImport(LibraryName, EntryPoint = "audio_stream_start")]
[UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
private static unsafe partial void CoreAudioStreamStart(
RequestBuffer* request,
ResponseBuffer* response);

[LibraryImport(LibraryName, EntryPoint = "audio_stream_push")]
[UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
private static unsafe partial void CoreAudioStreamPush(
StreamingRequestBuffer* request,
ResponseBuffer* response);

[LibraryImport(LibraryName, EntryPoint = "audio_stream_stop")]
[UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
private static unsafe partial void CoreAudioStreamStop(
RequestBuffer* request,
ResponseBuffer* response);

// helper to capture exceptions in callbacks
internal class CallbackHelper
{
Expand Down Expand Up @@ -331,4 +356,94 @@ public Task<Response> ExecuteCommandWithCallbackAsync(string commandName, CoreIn
return Task.Run(() => ExecuteCommandWithCallback(commandName, commandInput, callback), ct);
}

/// <summary>
/// Marshal a ResponseBuffer from unmanaged memory into a managed Response and free the unmanaged memory.
/// </summary>
private Response MarshalResponse(ResponseBuffer response)
{
Response result = new();

if (response.Data != IntPtr.Zero && response.DataLength > 0)
{
byte[] managedResponse = new byte[response.DataLength];
Marshal.Copy(response.Data, managedResponse, 0, response.DataLength);
result.Data = System.Text.Encoding.UTF8.GetString(managedResponse);
}

if (response.Error != IntPtr.Zero && response.ErrorLength > 0)
{
result.Error = Marshal.PtrToStringUTF8(response.Error, response.ErrorLength)!;
}

Marshal.FreeHGlobal(response.Data);
Marshal.FreeHGlobal(response.Error);

return result;
}

// --- Audio streaming managed implementations ---
// Route through the existing execute_command / execute_command_with_binary entry points.
// The Core handles audio_stream_start / audio_stream_stop as command cases in ExecuteCommandManaged,
// and audio_stream_push as a command case in ExecuteCommandWithBinaryManaged.

public Response StartAudioStream(CoreInteropRequest request)
{
return ExecuteCommand("audio_stream_start", request);
}

public Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory<byte> audioData)
{
try
{
var commandInputJson = request.ToJson();
byte[] commandBytes = System.Text.Encoding.UTF8.GetBytes("audio_stream_push");
byte[] inputBytes = System.Text.Encoding.UTF8.GetBytes(commandInputJson);

IntPtr commandPtr = Marshal.AllocHGlobal(commandBytes.Length);
Marshal.Copy(commandBytes, 0, commandPtr, commandBytes.Length);

IntPtr inputPtr = Marshal.AllocHGlobal(inputBytes.Length);
Marshal.Copy(inputBytes, 0, inputPtr, inputBytes.Length);

// Pin the managed audio data so GC won't move it during the native call
using var audioHandle = audioData.Pin();

unsafe
{
var reqBuf = new StreamingRequestBuffer
{
Command = commandPtr,
CommandLength = commandBytes.Length,
Data = inputPtr,
DataLength = inputBytes.Length,
BinaryData = (nint)audioHandle.Pointer,
BinaryDataLength = audioData.Length
};

ResponseBuffer response = default;

try
{
CoreExecuteCommandWithBinary(&reqBuf, &response);
}
finally
{
Marshal.FreeHGlobal(commandPtr);
Marshal.FreeHGlobal(inputPtr);
}

return MarshalResponse(response);
}
}
catch (Exception ex) when (ex is not OperationCanceledException)
{
throw new FoundryLocalException("Error executing audio_stream_push", ex, _logger);
}
}

public Response StopAudioStream(CoreInteropRequest request)
{
return ExecuteCommand("audio_stream_stop", request);
}

}
17 changes: 17 additions & 0 deletions sdk_v2/cs/src/Detail/ICoreInterop.cs
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,21 @@ Task<Response> ExecuteCommandAsync(string commandName, CoreInteropRequest? comma
Task<Response> ExecuteCommandWithCallbackAsync(string commandName, CoreInteropRequest? commandInput,
CallbackFn callback,
CancellationToken? ct = null);

// --- Audio streaming session support ---

[StructLayout(LayoutKind.Sequential)]
protected unsafe struct StreamingRequestBuffer
{
public nint Command;
public int CommandLength;
public nint Data; // JSON params
public int DataLength;
public nint BinaryData; // raw PCM audio bytes
public int BinaryDataLength;
}

Response StartAudioStream(CoreInteropRequest request);
Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory<byte> audioData);
Response StopAudioStream(CoreInteropRequest request);
}
3 changes: 3 additions & 0 deletions sdk_v2/cs/src/Detail/JsonSerializationContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ namespace Microsoft.AI.Foundry.Local.Detail;
[JsonSerializable(typeof(IList<FunctionDefinition>))]
[JsonSerializable(typeof(PropertyDefinition))]
[JsonSerializable(typeof(IList<PropertyDefinition>))]
// --- NEW: Audio streaming types ---
[JsonSerializable(typeof(LiveAudioTranscriptionResult))]
[JsonSerializable(typeof(CoreErrorResponse))]
[JsonSourceGenerationOptions(DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
WriteIndented = false)]
internal partial class JsonSerializationContext : JsonSerializerContext
Expand Down
34 changes: 11 additions & 23 deletions sdk_v2/cs/src/OpenAI/AudioClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ namespace Microsoft.AI.Foundry.Local;

using System.Runtime.CompilerServices;
using System.Threading.Channels;

using Betalgo.Ranul.OpenAI.ObjectModels.RequestModels;
using Betalgo.Ranul.OpenAI.ObjectModels.ResponseModels;

Expand Down Expand Up @@ -46,6 +45,16 @@ public record AudioSettings
/// </summary>
public AudioSettings Settings { get; } = new();

/// <summary>
/// Create a real-time streaming transcription session.
/// Audio data is pushed in as PCM chunks and transcription results are returned as an async stream.
/// </summary>
/// <returns>A streaming session that must be disposed when done.</returns>
public LiveAudioTranscriptionSession CreateLiveTranscriptionSession()
{
return new LiveAudioTranscriptionSession(_modelId);
}

/// <summary>
/// Transcribe audio from a file.
/// </summary>
Expand All @@ -63,28 +72,6 @@ public async Task<AudioCreateTranscriptionResponse> TranscribeAudioAsync(string
.ConfigureAwait(false);
}

/// <summary>
/// Transcribe audio from a file with streamed output.
/// </summary>
/// <param name="audioFilePath">
/// Path to file containing audio recording.
/// Supported formats: mp3
/// </param>
/// <param name="ct">Cancellation token.</param>
/// <returns>An asynchronous enumerable of transcription responses.</returns>
public async IAsyncEnumerable<AudioCreateTranscriptionResponse> TranscribeAudioStreamingAsync(
string audioFilePath, [EnumeratorCancellation] CancellationToken ct)
{
var enumerable = Utils.CallWithExceptionHandling(
() => TranscribeAudioStreamingImplAsync(audioFilePath, ct),
"Error during streaming audio transcription.", _logger).ConfigureAwait(false);

await foreach (var item in enumerable)
{
yield return item;
}
}

private async Task<AudioCreateTranscriptionResponse> TranscribeAudioImplAsync(string audioFilePath,
CancellationToken? ct)
{
Expand All @@ -108,6 +95,7 @@ private async Task<AudioCreateTranscriptionResponse> TranscribeAudioImplAsync(st
return output;
}


private async IAsyncEnumerable<AudioCreateTranscriptionResponse> TranscribeAudioStreamingImplAsync(
string audioFilePath, [EnumeratorCancellation] CancellationToken ct)
{
Expand Down
Loading
Loading