forked from microsoft/BotBuilder-Samples
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
28 changed files
with
1,405 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -253,3 +253,6 @@ paket-files/ | |
|
||
# Node | ||
/**/node_modules | ||
|
||
# Visual Studio Code | ||
.vscode/ |
33 changes: 33 additions & 0 deletions
33
CSharp/intelligence-SpeechToText/App_Start/WebApiConfig.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
namespace SpeechToText | ||
{ | ||
using System.Web.Http; | ||
using Newtonsoft.Json; | ||
using Newtonsoft.Json.Serialization; | ||
|
||
public static class WebApiConfig | ||
{ | ||
public static void Register(HttpConfiguration config) | ||
{ | ||
// Json settings | ||
config.Formatters.JsonFormatter.SerializerSettings.NullValueHandling = NullValueHandling.Ignore; | ||
config.Formatters.JsonFormatter.SerializerSettings.ContractResolver = new CamelCasePropertyNamesContractResolver(); | ||
config.Formatters.JsonFormatter.SerializerSettings.Formatting = Formatting.Indented; | ||
JsonConvert.DefaultSettings = () => new JsonSerializerSettings | ||
{ | ||
ContractResolver = new CamelCasePropertyNamesContractResolver(), | ||
Formatting = Formatting.Indented, | ||
NullValueHandling = NullValueHandling.Ignore, | ||
}; | ||
|
||
// Web API configuration and services | ||
|
||
// Web API routes | ||
config.MapHttpAttributeRoutes(); | ||
|
||
config.Routes.MapHttpRoute( | ||
name: "DefaultApi", | ||
routeTemplate: "api/{controller}/{id}", | ||
defaults: new { id = RouteParameter.Optional }); | ||
} | ||
} | ||
} |
182 changes: 182 additions & 0 deletions
182
CSharp/intelligence-SpeechToText/Controllers/MessagesController.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,182 @@ | ||
namespace SpeechToText.Controllers | ||
{ | ||
using System; | ||
using System.Diagnostics; | ||
using System.IO; | ||
using System.Linq; | ||
using System.Net; | ||
using System.Net.Http; | ||
using System.Net.Http.Headers; | ||
using System.Threading.Tasks; | ||
using System.Web.Http; | ||
using Microsoft.Bot.Connector; | ||
using Services; | ||
|
||
[BotAuthentication] | ||
public class MessagesController : ApiController | ||
{ | ||
private readonly MicrosoftCognitiveSpeechService speechService = new MicrosoftCognitiveSpeechService(); | ||
|
||
/// <summary> | ||
/// POST: api/Messages | ||
/// Receive a message from a user and reply to it | ||
/// </summary> | ||
public async Task<HttpResponseMessage> Post([FromBody]Activity activity) | ||
{ | ||
if (activity.Type == ActivityTypes.Message) | ||
{ | ||
var connector = new ConnectorClient(new Uri(activity.ServiceUrl)); | ||
string message; | ||
|
||
try | ||
{ | ||
var audioAttachment = activity.Attachments?.FirstOrDefault(a => a.ContentType.Equals("audio/wav") || a.ContentType.Equals("application/octet-stream")); | ||
if (audioAttachment != null) | ||
{ | ||
var stream = await GetImageStream(connector, audioAttachment); | ||
var text = await this.speechService.GetTextFromAudioAsync(stream); | ||
message = ProcessText(activity.Text, text); | ||
} | ||
else | ||
{ | ||
message = "Did you upload an audio file? I'm more of an audible person. Try sending me a wav file"; | ||
} | ||
} | ||
catch (Exception e) | ||
{ | ||
message = "Oops! Something went wrong. Try again later."; | ||
|
||
Trace.TraceError(e.ToString()); | ||
} | ||
|
||
Activity reply = activity.CreateReply(message); | ||
await connector.Conversations.ReplyToActivityAsync(reply); | ||
} | ||
else | ||
{ | ||
await this.HandleSystemMessage(activity); | ||
} | ||
|
||
var response = this.Request.CreateResponse(HttpStatusCode.OK); | ||
return response; | ||
} | ||
|
||
private static string ProcessText(string input, string text) | ||
{ | ||
string message = "You said : " + text + "."; | ||
|
||
input = input?.Trim(); | ||
|
||
if (!string.IsNullOrEmpty(input)) | ||
{ | ||
var normalizedInput = input.ToUpper(); | ||
|
||
if (normalizedInput.Equals("WORD")) | ||
{ | ||
var wordCount = text.Split(' ').Count(x => !string.IsNullOrEmpty(x)); | ||
message += " Word Count: " + wordCount; | ||
} | ||
else if (normalizedInput.Equals("CHARACTER")) | ||
{ | ||
var characterCount = text.Count(c => c != ' '); | ||
message += " Character Count: " + characterCount; | ||
} | ||
else if (normalizedInput.Equals("SPACE")) | ||
{ | ||
var spaceCount = text.Count(c => c == ' '); | ||
message += " Space Count: " + spaceCount; | ||
} | ||
else if (normalizedInput.Equals("VOWEL")) | ||
{ | ||
var vowelCount = text.ToUpper().Count("AEIOU".Contains); | ||
message += " Vowel Count: " + vowelCount; | ||
} | ||
else | ||
{ | ||
var keywordCount = text.ToUpper().Split(' ').Count(w => w == normalizedInput); | ||
message += " Keyword " + input + " found " + keywordCount + " times."; | ||
} | ||
} | ||
|
||
return message; | ||
} | ||
|
||
/// <summary> | ||
/// Handles the system activity. | ||
/// </summary> | ||
/// <param name="activity">The activity.</param> | ||
/// <returns>Activity</returns> | ||
private async Task<Activity> HandleSystemMessage(Activity activity) | ||
{ | ||
switch (activity.Type) | ||
{ | ||
case ActivityTypes.DeleteUserData: | ||
// Implement user deletion here | ||
// If we handle user deletion, return a real message | ||
break; | ||
case ActivityTypes.ConversationUpdate: | ||
// Greet the user the first time the bot is added to a conversation. | ||
if (activity.MembersAdded.Any(m => m.Id == activity.Recipient.Id)) | ||
{ | ||
var connector = new ConnectorClient(new Uri(activity.ServiceUrl)); | ||
|
||
var response = activity.CreateReply(); | ||
response.Text = "Hi! I am SpeechToText Bot. I can understand the content of any audio and convert it to text. Try sending me a wav file."; | ||
|
||
await connector.Conversations.ReplyToActivityAsync(response); | ||
} | ||
|
||
break; | ||
case ActivityTypes.ContactRelationUpdate: | ||
// Handle add/remove from contact lists | ||
break; | ||
case ActivityTypes.Typing: | ||
// Handle knowing that the user is typing | ||
break; | ||
case ActivityTypes.Ping: | ||
break; | ||
} | ||
|
||
return null; | ||
} | ||
|
||
private static async Task<Stream> GetImageStream(ConnectorClient connector, Attachment imageAttachment) | ||
{ | ||
using (var httpClient = new HttpClient()) | ||
{ | ||
// The Skype attachment URLs are secured by JwtToken, | ||
// you should set the JwtToken of your bot as the authorization header for the GET request your bot initiates to fetch the image. | ||
// https://github.com/Microsoft/BotBuilder/issues/662 | ||
var uri = new Uri(imageAttachment.ContentUrl); | ||
if (uri.Host.EndsWith("skype.com") && uri.Scheme == "https") | ||
{ | ||
httpClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", await GetTokenAsync(connector)); | ||
httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/octet-stream")); | ||
} | ||
else | ||
{ | ||
httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue(imageAttachment.ContentType)); | ||
} | ||
|
||
return await httpClient.GetStreamAsync(uri); | ||
} | ||
} | ||
|
||
/// <summary> | ||
/// Gets the JwT token of the bot. | ||
/// </summary> | ||
/// <param name="connector"></param> | ||
/// <returns>JwT token of the bot</returns> | ||
private static async Task<string> GetTokenAsync(ConnectorClient connector) | ||
{ | ||
var credentials = connector.Credentials as MicrosoftAppCredentials; | ||
if (credentials != null) | ||
{ | ||
return await credentials.GetTokenAsync(); | ||
} | ||
|
||
return null; | ||
} | ||
|
||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
<%@ Application Codebehind="Global.asax.cs" Inherits="SpeechToText.WebApiApplication" Language="C#" %> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
namespace SpeechToText | ||
{ | ||
using System.Web.Http; | ||
|
||
public class WebApiApplication : System.Web.HttpApplication | ||
{ | ||
protected void Application_Start() | ||
{ | ||
GlobalConfiguration.Configure(WebApiConfig.Register); | ||
} | ||
} | ||
} |
34 changes: 34 additions & 0 deletions
34
CSharp/intelligence-SpeechToText/Properties/AssemblyInfo.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
using System.Reflection; | ||
using System.Runtime.InteropServices; | ||
|
||
// General Information about an assembly is controlled through the following | ||
// set of attributes. Change these attribute values to modify the information | ||
// associated with an assembly. | ||
[assembly: AssemblyTitle("SpeechToText")] | ||
[assembly: AssemblyDescription("")] | ||
[assembly: AssemblyConfiguration("")] | ||
[assembly: AssemblyCompany("")] | ||
[assembly: AssemblyProduct("SpeechToText")] | ||
[assembly: AssemblyCopyright("Copyright © 2016")] | ||
[assembly: AssemblyTrademark("")] | ||
[assembly: AssemblyCulture("")] | ||
|
||
// Setting ComVisible to false makes the types in this assembly not visible | ||
// to COM components. If you need to access a type in this assembly from | ||
// COM, set the ComVisible attribute to true on that type. | ||
[assembly: ComVisible(false)] | ||
|
||
// The following GUID is for the ID of the typelib if this project is exposed to COM | ||
[assembly: Guid("a8ba1066-5695-4d71-abb4-65e5a5e0c3d4")] | ||
|
||
// Version information for an assembly consists of the following four values: | ||
// | ||
// Major Version | ||
// Minor Version | ||
// Build Number | ||
// Revision | ||
// | ||
// You can specify all the values or you can default the Revision and Build Numbers | ||
// by using the '*' as shown below: | ||
[assembly: AssemblyVersion("1.0.0.0")] | ||
[assembly: AssemblyFileVersion("1.0.0.0")] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
# Speech To Text Bot Sample | ||
|
||
A sample bot that illustrates how to use the Microsoft Cognitive Services Bing Speech API to analyze an audio file and return the text. | ||
|
||
[![Deploy to Azure](http://azuredeploy.net/deploybutton.png)](https://azuredeploy.net) | ||
|
||
### Prerequisites | ||
|
||
The minimum prerequisites to run this sample are: | ||
* The latest update of Visual Studio 2015. You can download the community version [here](http://www.visualstudio.com) for free. | ||
* The Bot Framework Emulator. To install the Bot Framework Emulator, download it from [here](https://aka.ms/bf-bc-emulator). Please refer to [this documentation article](https://docs.botframework.com/en-us/csharp/builder/sdkreference/gettingstarted.html#emulator) to know more about the Bot Framework Emulator. | ||
* **[Recommended]** Visual Studio Code for IntelliSense and debugging, download it from [here](https://code.visualstudio.com/) for free. | ||
* This sample currently uses a free trial Microsoft Cognitive service key with limited QPS. Please subscribe to Bing Speech Api services [here](https://www.microsoft.com/cognitive-services/en-us/subscriptions) and update the `MicrosoftSpeechApiKey` key in key in [Web.config](Web.config) file to try it out further. | ||
|
||
### Usage | ||
|
||
Attach an audio file (wav format) and send an optional command as text. | ||
Supported Commands: | ||
* `WORD` - Counts the number of words. | ||
* `CHARACTER` - Counts the number of characters excluding spaces. | ||
* `SPACE` - Counts the number of spaces. | ||
* `VOWEL` - Counts the number of vowels. | ||
* Any other word will count the occurrences of that word in the transcribed text | ||
|
||
### Code Highlights | ||
|
||
Microsoft Cognitive Services provides a Speech Recognition API to convert audio into text. Check out [Bing Speech API](https://www.microsoft.com/cognitive-services/en-us/speech-api) for a complete reference of Speech APIs available. In this sample we are using the Speech Recognition API using the [REST API](https://www.microsoft.com/cognitive-services/en-us/Speech-api/documentation/API-Reference-REST/BingVoiceRecognition). | ||
|
||
In this sample we are using the API to get the text and send it back to the user. Check out the use of the `MicrosoftCognitiveSpeechService.GetTextFromAudioAsync()` method in the [Controllers/MessagesController](Controllers/MessagesController.cs) class. | ||
````C# | ||
var audioAttachment = activity.Attachments?.FirstOrDefault(a => a.ContentType.Equals("audio/wav")); | ||
if (audioAttachment != null) | ||
{ | ||
using (var client = new HttpClient()) | ||
{ | ||
var stream = await client.GetStreamAsync(audioAttachment.ContentUrl); | ||
var text = await this.speechService.GetTextFromAudioAsync(stream); | ||
message = ProcessText(activity.Text, text); | ||
} | ||
} | ||
```` | ||
|
||
and here is the implementation of `MicrosoftCognitiveSpeechService.GetTextFromAudioAsync()` in [Services/MicrosoftCognitiveSpeechService.cs](Services/MicrosoftCognitiveSpeechService.cs) | ||
````C# | ||
/// <summary> | ||
/// Gets text from an audio stream. | ||
/// </summary> | ||
/// <param name="audiostream"></param> | ||
/// <returns>Transcribed text. </returns> | ||
public async Task<string> GetTextFromAudioAsync(Stream audiostream) | ||
{ | ||
var requestUri = @"https://speech.platform.bing.com/recognize?scenarios=smd&appid=D4D52672-91D7-4C74-8AD8-42B1D98141A5&locale=en-US&device.os=bot&version=3.0&format=json&instanceid=565D69FF-E928-4B7E-87DA-9A750B96D9E3&requestid=" + Guid.NewGuid(); | ||
|
||
using (var client = new HttpClient()) | ||
{ | ||
var token = Authentication.Instance.GetAccessToken(); | ||
client.DefaultRequestHeaders.Add("Authorization", "Bearer " + token.access_token); | ||
|
||
using (var binaryContent = new ByteArrayContent(StreamToBytes(audiostream))) | ||
{ | ||
binaryContent.Headers.TryAddWithoutValidation("content-type", "audio/wav; codec=\"audio/pcm\"; samplerate=16000"); | ||
|
||
var response = await client.PostAsync(requestUri, binaryContent); | ||
var responseString = await response.Content.ReadAsStringAsync(); | ||
dynamic data = JsonConvert.DeserializeObject(responseString); | ||
return data.header.name; | ||
} | ||
} | ||
} | ||
```` | ||
|
||
### Outcome | ||
|
||
You will see the following when connecting the Bot to the Emulator and send it an audio file and a command: | ||
|
||
Input: | ||
|
||
["What's the weather like?"](audio/whatstheweatherlike.wav) | ||
|
||
Output: | ||
|
||
![Sample Outcome](images/outcome-emulator.png) | ||
|
||
### More Information | ||
|
||
To get more information about how to get started in Bot Builder for .NET and Microsoft Cognitive Services Bing Speech API please review the following resources: | ||
* [Bot Builder for .NET](https://docs.botframework.com/en-us/csharp/builder/sdkreference/index.html) | ||
* [Microsoft Cognitive Services Bing Speech API](https://www.microsoft.com/cognitive-services/en-us/speech-api) |
13 changes: 13 additions & 0 deletions
13
CSharp/intelligence-SpeechToText/Services/AccessTokenInfo.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
namespace SpeechToText.Services | ||
{ | ||
public class AccessTokenInfo | ||
{ | ||
public string access_token { get; set; } | ||
|
||
public string token_type { get; set; } | ||
|
||
public int expires_in { get; set; } | ||
|
||
public string scope { get; set; } | ||
} | ||
} |
Oops, something went wrong.