Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions dotnet/Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@
<PackageVersion Include="Microsoft.Extensions.Logging.Console" Version="8.0.1" />
<PackageVersion Include="Microsoft.Extensions.Logging.Debug" Version="8.0.1" />
<PackageVersion Include="Microsoft.Extensions.Options.DataAnnotations" Version="8.0.0" />
<PackageVersion Include="Microsoft.Extensions.Options.ConfigurationExtensions" Version="8.0.0" />
<PackageVersion Include="Microsoft.Extensions.TimeProvider.Testing" Version="8.10.0" />
<PackageVersion Include="Microsoft.Extensions.FileProviders.Physical" Version="8.0.0" />
<PackageVersion Include="Microsoft.Extensions.FileProviders.Embedded" Version="8.0.11" />
Expand Down Expand Up @@ -228,5 +229,7 @@
<PackageVersion Include="Spectre.Console" Version="0.49.1" />
<PackageVersion Include="Spectre.Console.Cli" Version="0.49.1" />
<PackageVersion Include="Spectre.Console.Json" Version="0.49.1" />
<PackageVersion Include="NAudio" Version="2.2.1" />
<PackageVersion Include="WebRtcVadSharp" Version="1.3.2" />
</ItemGroup>
</Project>
1 change: 1 addition & 0 deletions dotnet/SK-dotnet.slnx
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
<Project Path="samples/Demos/TelemetryWithAppInsights/TelemetryWithAppInsights.csproj" />
<Project Path="samples/Demos/TimePlugin/TimePlugin.csproj" />
<Project Path="samples/Demos/VectorStoreRAG/VectorStoreRAG.csproj" />
<Project Path="samples/Demos/VoiceChat/VoiceChat.csproj" />
</Folder>
<Folder Name="/samples/Demos/A2AClientServer/">
<Project Path="samples/Demos/A2AClientServer/A2AClient/A2AClient.csproj" />
Expand Down
10 changes: 10 additions & 0 deletions dotnet/samples/Demos/VoiceChat/Options/AudioOptions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// Copyright (c) Microsoft. All rights reserved.

public class AudioOptions
{
// Audio configuration constants, not part of appsettings, not intended to be changed
public const int SampleRate = 16000;
public const int Channels = 1;
public const int BitsPerSample = 16;
public const int BufferMilliseconds = 20;
}
17 changes: 17 additions & 0 deletions dotnet/samples/Demos/VoiceChat/Options/ChatOptions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// Copyright (c) Microsoft. All rights reserved.

using System.ComponentModel.DataAnnotations;

public class ChatOptions
{
public const string SectionName = "Chat";

[Required]
public string SystemMessage { get; set; } = string.Empty;

// Chat response streaming constants
public int StreamingChunkSizeThreshold { get; set; } = 100;
public double Temperature { get; set; } = 0.7;
public int MaxTokens { get; set; } = 500;
public double TopP { get; set; } = 0.9;
}
20 changes: 20 additions & 0 deletions dotnet/samples/Demos/VoiceChat/Options/OpenAIOptions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Copyright (c) Microsoft. All rights reserved.

using System.ComponentModel.DataAnnotations;

public class OpenAIOptions
{
public const string SectionName = "OpenAI";

[Required]
public string ApiKey { get; set; } = string.Empty;

[Required]
public string ChatModelId { get; set; } = "gpt-4";

[Required]
public string TranscriptionModelId { get; set; } = "gpt-4o-transcribe";

[Required]
public string SpeechModelId { get; set; } = "gpt-4o-mini-tts";
}
50 changes: 50 additions & 0 deletions dotnet/samples/Demos/VoiceChat/Pipeline/PipelineEvents.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Copyright (c) Microsoft. All rights reserved.

global using AudioChunkEvent = PipelineEvent<byte[]>;
global using AudioEvent = PipelineEvent<AudioData>;
global using ChatEvent = PipelineEvent<string>;
global using SpeechEvent = PipelineEvent<byte[]>;
global using TranscriptionEvent = PipelineEvent<string?>;

public readonly struct PipelineEvent<T>(int turnId, CancellationToken cancellationToken, T payload) : IEquatable<PipelineEvent<T>>
{
public int TurnId { get; } = turnId;
public CancellationToken CancellationToken { get; } = cancellationToken;
public T Payload { get; } = payload;

public static bool IsValid(PipelineEvent<T> evt, int currentTurnId, Func<T, bool>? payloadPredicate = null)
=> evt.Payload != null
&& evt.TurnId == currentTurnId
&& !evt.CancellationToken.IsCancellationRequested
&& (payloadPredicate?.Invoke(evt.Payload) ?? true);

public override bool Equals(object obj)
{
throw new NotImplementedException();
}

public override int GetHashCode()
{
throw new NotImplementedException();
}

public static bool operator ==(PipelineEvent<T> left, PipelineEvent<T> right)
{
return left.Equals(right);
}

public static bool operator !=(PipelineEvent<T> left, PipelineEvent<T> right)
{
return !(left == right);
}

public bool Equals(PipelineEvent<T> other)
{
throw new NotImplementedException();
}
}

public record AudioData(byte[] Data, int SampleRate, int Channels, int BitsPerSample)
{
public TimeSpan Duration => TimeSpan.FromSeconds((double)this.Data.Length / (this.SampleRate * this.Channels * this.BitsPerSample / 8));
}
23 changes: 23 additions & 0 deletions dotnet/samples/Demos/VoiceChat/Pipeline/TurnManager.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// Copyright (c) Microsoft. All rights reserved.

public class TurnManager : IDisposable
{
private int _currentTurnId = 0;
private CancellationTokenSource _cts = new();
private readonly object _lock = new();
public int CurrentTurnId { get { lock (this._lock) { return this._currentTurnId; } } }
public CancellationToken CurrentToken { get { lock (this._lock) { return this._cts.Token; } } }

public void Interrupt()
{
lock (this._lock)
{
this._currentTurnId++;
this._cts.Cancel();
this._cts.Dispose();
this._cts = new CancellationTokenSource();
}
}

public void Dispose() => this._cts?.Dispose();
}
127 changes: 127 additions & 0 deletions dotnet/samples/Demos/VoiceChat/Pipeline/VoiceChatPipeline.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
// Copyright (c) Microsoft. All rights reserved.

using System.Threading.Tasks.Dataflow;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;

public class VoiceChatPipeline : IDisposable
{
// Pipeline configuration constants
private const int MaxDegreeOfParallelism = 1; // Number of parallel operations in dataflow blocks
private const int BoundedCapacity = 5; // Maximum capacity for dataflow block buffers
private const bool EnsureOrdered = true; // Ensure order preservation in pipeline

// Dataflow options fields - initialized inline
private readonly ExecutionDataflowBlockOptions _executionOptions = new()
{
MaxDegreeOfParallelism = MaxDegreeOfParallelism,
BoundedCapacity = BoundedCapacity,
EnsureOrdered = EnsureOrdered
};

private readonly DataflowLinkOptions _linkOptions = new() { PropagateCompletion = true };
private readonly ILogger<VoiceChatPipeline> _logger;
private readonly AudioPlaybackService _audioPlaybackService;
private readonly SpeechToTextService _speechToTextService;
private readonly TextToSpeechService _textToSpeechService;
private readonly ChatService _chatService;
private readonly TurnManager _turnManager;
private readonly VadService _vadService;
private readonly AudioSourceService _audioSourceService;

private CancellationTokenSource? _cancellationTokenSource;

public VoiceChatPipeline(
ILogger<VoiceChatPipeline> logger,
AudioPlaybackService audioPlaybackService,
SpeechToTextService speechToTextService,
TextToSpeechService textToSpeechService,
ChatService chatService,
VadService vadService,
AudioSourceService audioSourceService,
TurnManager turnManager,
IOptions<AudioOptions> audioOptions)
{
this._logger = logger;
this._audioPlaybackService = audioPlaybackService;
this._speechToTextService = speechToTextService;
this._textToSpeechService = textToSpeechService;
this._chatService = chatService;
this._vadService = vadService;
this._audioSourceService = audioSourceService;
this._turnManager = turnManager;
}

public async Task RunAsync(CancellationToken cancellationToken = default)
{
this._cancellationTokenSource = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);

// Create pipeline blocks - VAD now accepts raw audio chunks directly
var vadBlock = new TransformManyBlock<byte[], AudioEvent>(this._vadService.Transform, this._executionOptions);
var sttBlock = new TransformBlock<AudioEvent, TranscriptionEvent>(this._speechToTextService.TransformAsync, this._executionOptions);
var chatBlock = new TransformManyBlock<TranscriptionEvent, ChatEvent>(this._chatService.TransformAsync, this._executionOptions);
var ttsBlock = new TransformBlock<ChatEvent, SpeechEvent>(this._textToSpeechService.TransformAsync, this._executionOptions);
var playbackBlock = new ActionBlock<SpeechEvent>(this._audioPlaybackService.PipelineActionAsync, this._executionOptions);

// Connect the blocks in the pipeline
this.Link(vadBlock, sttBlock, "VAD", audioData => audioData.Data.Length > 0);
this.Link(sttBlock, chatBlock, "STT", t => !string.IsNullOrEmpty(t));
this.Link(chatBlock, ttsBlock, "Chat", t => !string.IsNullOrEmpty(t));
this.Link(ttsBlock, playbackBlock, "TTS", t => t.Length > 0);

this._logger.LogInformation("Voice Chat started. You can start conversation now, or press Ctrl+C to exit.");

try
{
// Keep feeding audio chunks into the VAD pipeline block till RunAsync is not cancelled
await foreach (var audioChunk in this._audioSourceService.GetAudioChunksAsync(this._cancellationTokenSource.Token))
{
await vadBlock.SendAsync(audioChunk, this._cancellationTokenSource.Token);
}
}
catch (OperationCanceledException)
{
this._logger.LogInformation("Voice Chat pipeline stopping due to cancellation...");
}
finally
{
vadBlock.Complete();
await playbackBlock.Completion;
}
}

public void Dispose()
{
this._vadService?.Dispose();
this._cancellationTokenSource?.Dispose();
}

// Generic filter methods for pipeline events
private bool Filter<T>(PipelineEvent<T> evt, string blockName, Func<T, bool> predicate, IDataflowBlock block)
{
var valid = PipelineEvent<T>.IsValid(evt, this._turnManager.CurrentTurnId, predicate);
if (!valid)
{
this._logger.LogWarning($"{blockName} block: Event filtered out due to cancellation or empty payload.");
}
return valid;
}

private bool FilterDiscarded<T>(PipelineEvent<T> evt, string blockName)
{
this._logger.LogWarning($"{blockName} block: Event filtered out due to cancellation or empty.");
return true;
}

private void Link<T>(
ISourceBlock<PipelineEvent<T>> source,
ITargetBlock<PipelineEvent<T>> target,
string blockName,
Func<T, bool> predicate)
{
source.LinkTo(target, this._linkOptions, evt => this.Filter(evt, blockName, predicate, source));
this.DiscardFiltered(source, blockName);
}

private void DiscardFiltered<T>(ISourceBlock<PipelineEvent<T>> block, string blockName) => block.LinkTo(DataflowBlock.NullTarget<PipelineEvent<T>>(), this._linkOptions, evt => this.FilterDiscarded(evt, blockName));
}
58 changes: 58 additions & 0 deletions dotnet/samples/Demos/VoiceChat/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// Copyright (c) Microsoft. All rights reserved.

using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.SemanticKernel;

internal static class Program
{
internal static async Task Main(string[] args)
{
var builder = Host.CreateApplicationBuilder(args);

// Adding configuration from appsettings.json and environment variables
builder.Services.ConfigureOptions<OpenAIOptions>(OpenAIOptions.SectionName);
builder.Services.ConfigureOptions<ChatOptions>(ChatOptions.SectionName);

// Configure Semantic Kernel in DI container
builder.Services
.AddKernel()
.AddOpenAIChatCompletion(
modelId: builder.Configuration[$"{OpenAIOptions.SectionName}:ChatModelId"]!,
apiKey: builder.Configuration[$"{OpenAIOptions.SectionName}:ApiKey"]!
);

// Register audio chat pipeline services
builder.Services.AddSingleton<AudioPlaybackService>();
builder.Services.AddSingleton<SpeechToTextService>();
builder.Services.AddSingleton<TextToSpeechService>();
builder.Services.AddSingleton<ChatService>();
builder.Services.AddSingleton<TurnManager>();
builder.Services.AddSingleton<VadService>();
builder.Services.AddSingleton<AudioSourceService>();

// Register audio chat pipeline
builder.Services.AddTransient<VoiceChatPipeline>();

using var host = builder.Build();

// Setting up graceful shutdown on Ctrl+C
using var cts = new CancellationTokenSource();
Console.CancelKeyPress += (_, e) =>
{
e.Cancel = true;
cts.Cancel();
};

// Run the voice chat pipeline
using var pipeline = host.Services.GetRequiredService<VoiceChatPipeline>();
await pipeline.RunAsync(cts.Token);
}

private static void ConfigureOptions<TOptions>(this IServiceCollection services, string sectionName) where TOptions : class =>
services
.AddOptions<TOptions>()
.BindConfiguration(sectionName)
.ValidateDataAnnotations()
.ValidateOnStart();
}
Loading
Loading