-
Notifications
You must be signed in to change notification settings - Fork 832
M.E.AI.Abstractions - Speech to Text Abstraction #5838
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
stephentoub
merged 32 commits into
dotnet:main
from
rogerbarreto:audio-transcription-abstraction
Apr 2, 2025
Merged
Changes from all commits
Commits
Show all changes
32 commits
Select commit
Hold shift + click to select a range
db6b7e3
Initial speech to text abstractions
rogerbarreto 4bdb7b9
Address some feedback (still more things to address)
stephentoub 14c37c2
Merge branch 'main' of https://github.com/rogerbarreto/extensions int…
rogerbarreto fad8017
Resolve conflict
rogerbarreto 3448daa
Ensure UT are working before further changes
rogerbarreto ef93211
Update method names Transcribe / Response to GetText
rogerbarreto 43d610c
Update Test Names to new Method names
rogerbarreto ff4ae4a
Change interface from IList<IAsyncEnumerable> to one stream item at a…
rogerbarreto 0831000
Update XmlDocs with corret definition, ensure correct naming
rogerbarreto 8c893a9
Dropping the Choice / Message concept, flattering the Message with th…
rogerbarreto 3d91982
Remove CultureInfo complexity from language properties
rogerbarreto 009eeca
Adding Prompt property to options + UT
rogerbarreto 305e7e4
Revert global.json changes
rogerbarreto 1feac6d
Add missing experimental
rogerbarreto 956097d
Fix UT
rogerbarreto 0830a51
Address PR comments
rogerbarreto 72407f2
Fix unit tests
rogerbarreto 3c7e4ae
Fix UT
rogerbarreto 8763c8c
Merge branch 'main' into audio-transcription-abstraction
rogerbarreto 8d473cb
Merge branch 'audio-transcription-abstraction' of https://github.com/…
rogerbarreto c6c016e
Address PR comments
rogerbarreto b3d7819
Merge branch 'main' into audio-transcription-abstraction
rogerbarreto ca1338b
Remove async wrapping
rogerbarreto d3a14c9
Adjusting concat / text fields
rogerbarreto 263f0e0
Start time and end time added to update + UT covering
rogerbarreto dd5ec14
AsISpeechToText renaming
rogerbarreto 9eabb98
Remove OpenAIClient ctor + small fixes
rogerbarreto 78e4ebb
Removing rawrepresentation impl from Update -> Response
rogerbarreto 46acd1c
Merge branch 'main' into audio-transcription-abstraction
rogerbarreto 8bf3389
Add missing AsISpeechToText UT
rogerbarreto c5c6e89
Add GetService UT
rogerbarreto 977a0e5
Warning fix
rogerbarreto File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
Binary file not shown.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
48 changes: 48 additions & 0 deletions
48
src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/ErrorContent.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
|
||
using System.Diagnostics; | ||
using System.Text.Json.Serialization; | ||
using Microsoft.Shared.Diagnostics; | ||
|
||
namespace Microsoft.Extensions.AI; | ||
|
||
/// <summary>Represents an error.</summary> | ||
/// <remarks> | ||
/// Typically, <see cref="ErrorContent"/> is used for non-fatal errors, where something went wrong | ||
/// as part of the operation but the operation was still able to continue. | ||
/// </remarks> | ||
[DebuggerDisplay("{DebuggerDisplay,nq}")] | ||
public class ErrorContent : AIContent | ||
stephentoub marked this conversation as resolved.
Show resolved
Hide resolved
|
||
{ | ||
/// <summary>The error message.</summary> | ||
private string _message; | ||
|
||
/// <summary>Initializes a new instance of the <see cref="ErrorContent"/> class with the specified message.</summary> | ||
/// <param name="message">The message to store in this content.</param> | ||
[JsonConstructor] | ||
public ErrorContent(string message) | ||
{ | ||
_message = Throw.IfNull(message); | ||
} | ||
|
||
/// <summary>Gets or sets the error message.</summary> | ||
public string Message | ||
{ | ||
get => _message; | ||
set => _message = Throw.IfNull(value); | ||
} | ||
|
||
/// <summary>Gets or sets the error code.</summary> | ||
public string? ErrorCode { get; set; } | ||
|
||
/// <summary>Gets or sets the error details.</summary> | ||
public string? Details { get; set; } | ||
|
||
/// <summary>Gets a string representing this instance to display in the debugger.</summary> | ||
[DebuggerBrowsable(DebuggerBrowsableState.Never)] | ||
private string DebuggerDisplay => | ||
$"Error = {Message}" + | ||
(ErrorCode is not null ? $" ({ErrorCode})" : string.Empty) + | ||
(Details is not null ? $" - {Details}" : string.Empty); | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
77 changes: 77 additions & 0 deletions
77
...braries/Microsoft.Extensions.AI.Abstractions/SpeechToText/DelegatingSpeechToTextClient.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
|
||
using System; | ||
using System.Collections.Generic; | ||
using System.Diagnostics.CodeAnalysis; | ||
using System.IO; | ||
using System.Threading; | ||
using System.Threading.Tasks; | ||
using Microsoft.Shared.Diagnostics; | ||
|
||
namespace Microsoft.Extensions.AI; | ||
|
||
/// <summary> | ||
/// Provides an optional base class for an <see cref="ISpeechToTextClient"/> that passes through calls to another instance. | ||
/// </summary> | ||
/// <remarks> | ||
/// This is recommended as a base type when building clients that can be chained in any order around an underlying <see cref="ISpeechToTextClient"/>. | ||
/// The default implementation simply passes each call to the inner client instance. | ||
/// </remarks> | ||
[Experimental("MEAI001")] | ||
public class DelegatingSpeechToTextClient : ISpeechToTextClient | ||
{ | ||
/// <summary> | ||
/// Initializes a new instance of the <see cref="DelegatingSpeechToTextClient"/> class. | ||
/// </summary> | ||
/// <param name="innerClient">The wrapped client instance.</param> | ||
protected DelegatingSpeechToTextClient(ISpeechToTextClient innerClient) | ||
{ | ||
InnerClient = Throw.IfNull(innerClient); | ||
} | ||
|
||
/// <inheritdoc /> | ||
public void Dispose() | ||
{ | ||
Dispose(disposing: true); | ||
GC.SuppressFinalize(this); | ||
} | ||
|
||
/// <summary>Gets the inner <see cref="ISpeechToTextClient" />.</summary> | ||
protected ISpeechToTextClient InnerClient { get; } | ||
|
||
/// <inheritdoc /> | ||
public virtual Task<SpeechToTextResponse> GetTextAsync( | ||
Stream audioSpeechStream, SpeechToTextOptions? options = null, CancellationToken cancellationToken = default) | ||
{ | ||
return InnerClient.GetTextAsync(audioSpeechStream, options, cancellationToken); | ||
} | ||
|
||
/// <inheritdoc /> | ||
public virtual IAsyncEnumerable<SpeechToTextResponseUpdate> GetStreamingTextAsync( | ||
Stream audioSpeechStream, SpeechToTextOptions? options = null, CancellationToken cancellationToken = default) | ||
{ | ||
return InnerClient.GetStreamingTextAsync(audioSpeechStream, options, cancellationToken); | ||
} | ||
|
||
/// <inheritdoc /> | ||
public virtual object? GetService(Type serviceType, object? serviceKey = null) | ||
{ | ||
_ = Throw.IfNull(serviceType); | ||
|
||
// If the key is non-null, we don't know what it means so pass through to the inner service. | ||
return | ||
serviceKey is null && serviceType.IsInstanceOfType(this) ? this : | ||
InnerClient.GetService(serviceType, serviceKey); | ||
} | ||
|
||
/// <summary>Provides a mechanism for releasing unmanaged resources.</summary> | ||
/// <param name="disposing"><see langword="true"/> if being called from <see cref="Dispose()"/>; otherwise, <see langword="false"/>.</param> | ||
protected virtual void Dispose(bool disposing) | ||
{ | ||
if (disposing) | ||
{ | ||
InnerClient.Dispose(); | ||
} | ||
} | ||
} |
61 changes: 61 additions & 0 deletions
61
src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/ISpeechToTextClient.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
|
||
using System; | ||
using System.Collections.Generic; | ||
using System.Diagnostics.CodeAnalysis; | ||
using System.IO; | ||
using System.Threading; | ||
using System.Threading.Tasks; | ||
|
||
namespace Microsoft.Extensions.AI; | ||
|
||
/// <summary>Represents a speech to text client.</summary> | ||
/// <remarks> | ||
/// <para> | ||
/// Unless otherwise specified, all members of <see cref="ISpeechToTextClient"/> are thread-safe for concurrent use. | ||
/// It is expected that all implementations of <see cref="ISpeechToTextClient"/> support being used by multiple requests concurrently. | ||
/// </para> | ||
/// <para> | ||
/// However, implementations of <see cref="ISpeechToTextClient"/> might mutate the arguments supplied to <see cref="GetTextAsync"/> and | ||
/// <see cref="GetStreamingTextAsync"/>, such as by configuring the options instance. Thus, consumers of the interface either should avoid | ||
/// using shared instances of these arguments for concurrent invocations or should otherwise ensure by construction that no | ||
/// <see cref="ISpeechToTextClient"/> instances are used which might employ such mutation. For example, the ConfigureOptions method be | ||
/// provided with a callback that could mutate the supplied options argument, and that should be avoided if using a singleton options instance. | ||
/// The audio speech stream passed to these methods will not be closed or disposed by the implementation. | ||
/// </para> | ||
/// </remarks> | ||
[Experimental("MEAI001")] | ||
public interface ISpeechToTextClient : IDisposable | ||
{ | ||
/// <summary>Sends audio speech content to the model and returns the generated text.</summary> | ||
/// <param name="audioSpeechStream">The audio speech stream to send.</param> | ||
/// <param name="options">The speech to text options to configure the request.</param> | ||
/// <param name="cancellationToken">The <see cref="CancellationToken"/> to monitor for cancellation requests. The default is <see cref="CancellationToken.None"/>.</param> | ||
/// <returns>The text generated.</returns> | ||
Task<SpeechToTextResponse> GetTextAsync( | ||
Stream audioSpeechStream, | ||
SpeechToTextOptions? options = null, | ||
CancellationToken cancellationToken = default); | ||
|
||
/// <summary>Sends audio speech content to the model and streams back the generated text.</summary> | ||
/// <param name="audioSpeechStream">The audio speech stream to send.</param> | ||
/// <param name="options">The speech to text options to configure the request.</param> | ||
/// <param name="cancellationToken">The <see cref="CancellationToken"/> to monitor for cancellation requests. The default is <see cref="CancellationToken.None"/>.</param> | ||
/// <returns>The text updates representing the streamed output.</returns> | ||
IAsyncEnumerable<SpeechToTextResponseUpdate> GetStreamingTextAsync( | ||
Stream audioSpeechStream, | ||
SpeechToTextOptions? options = null, | ||
CancellationToken cancellationToken = default); | ||
|
||
/// <summary>Asks the <see cref="ISpeechToTextClient"/> for an object of the specified type <paramref name="serviceType"/>.</summary> | ||
/// <param name="serviceType">The type of object being requested.</param> | ||
/// <param name="serviceKey">An optional key that can be used to help identify the target service.</param> | ||
/// <returns>The found object, otherwise <see langword="null"/>.</returns> | ||
/// <exception cref="ArgumentNullException"><paramref name="serviceType"/> is <see langword="null"/>.</exception> | ||
/// <remarks> | ||
/// The purpose of this method is to allow for the retrieval of strongly typed services that might be provided by the <see cref="ISpeechToTextClient"/>, | ||
/// including itself or any services it might be wrapping. | ||
/// </remarks> | ||
object? GetService(Type serviceType, object? serviceKey = null); | ||
} |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.