-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Created sample for 'ApplyWordEmbedding' API. #3142
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.IO; | ||
using System.Text; | ||
|
||
namespace Microsoft.ML.Samples.Dynamic | ||
{ | ||
public static class ApplyCustomWordEmbedding | ||
{ | ||
public static void Example() | ||
{ | ||
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, | ||
// as well as the source of randomness. | ||
var mlContext = new MLContext(); | ||
|
||
// Create an empty data sample list. The 'ApplyWordEmbedding' does not require training data as | ||
// the estimator ('WordEmbeddingEstimator') created by 'ApplyWordEmbedding' API is not a trainable estimator. | ||
// The empty list is only needed to pass input schema to the pipeline. | ||
var emptySamples = new List<TextData>(); | ||
|
||
// Convert sample list to an empty IDataView. | ||
var emptyDataView = mlContext.Data.LoadFromEnumerable(emptySamples); | ||
|
||
// Write a custom 3-dimensional word embedding model with 4 words. | ||
// Each line follows '<word> <float> <float> <float>' pattern. | ||
// Lines that do not confirm to the pattern are ignored. | ||
var pathToCustomModel = @".\custommodel.txt"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
would user reading the documentation get access to this file ? #Resolved There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't get you properly. This file is created once the sample executes. It is needed to pass on to the In reply to: 270987259 [](ancestors = 270987259) |
||
using (StreamWriter file = new StreamWriter(pathToCustomModel, false)) | ||
{ | ||
file.WriteLine("great 1.0 2.0 3.0"); | ||
file.WriteLine("product -1.0 -2.0 -3.0"); | ||
file.WriteLine("like -1 100.0 -100"); | ||
file.WriteLine("buy 0 0 20"); | ||
} | ||
|
||
// A pipeline for converting text into a 9-dimension word embedding vector using the custom word embedding model. | ||
// The 'ApplyWordEmbedding' computes the minimum, average and maximum values for each token's embedding vector. | ||
// Tokens in 'custommodel.txt' model are represented as 3-dimension vector. | ||
// Therefore, the output is of 9-dimension [min, avg, max]. | ||
// | ||
// The 'ApplyWordEmbedding' API requires vector of text as input. | ||
// The pipeline first normalizes and tokenizes text then applies word embedding transformation. | ||
var textPipeline = mlContext.Transforms.Text.NormalizeText("Text") | ||
.Append(mlContext.Transforms.Text.TokenizeIntoWords("Tokens", "Text")) | ||
.Append(mlContext.Transforms.Text.ApplyWordEmbedding("Features", pathToCustomModel, "Tokens")); | ||
|
||
// Fit to data. | ||
var textTransformer = textPipeline.Fit(emptyDataView); | ||
|
||
// Create the prediction engine to get the embedding vector from the input text/string. | ||
var predictionEngine = mlContext.Model.CreatePredictionEngine<TextData, TransformedTextData>(textTransformer); | ||
|
||
// Call the prediction API to convert the text into embedding vector. | ||
var data = new TextData() { Text = "This is a great product. I would like to buy it again." }; | ||
var prediction = predictionEngine.Predict(data); | ||
|
||
// Print the length of the embedding vector. | ||
Console.WriteLine($"Number of Features: {prediction.Features.Length}"); | ||
|
||
// Print the embedding vector. | ||
Console.Write("Features: "); | ||
foreach (var f in prediction.Features) | ||
Console.Write($"{f:F4} "); | ||
|
||
// Expected output: | ||
// Number of Features: 9 | ||
// Features: -1.0000 0.0000 -100.0000 0.0000 34.0000 -25.6667 1.0000 100.0000 20.0000 | ||
} | ||
|
||
public class TextData | ||
{ | ||
public string Text { get; set; } | ||
} | ||
|
||
public class TransformedTextData : TextData | ||
{ | ||
public float[] Features { get; set; } | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Text; | ||
|
||
namespace Microsoft.ML.Samples.Dynamic | ||
{ | ||
public static class ApplyWordEmbedding | ||
{ | ||
public static void Example() | ||
{ | ||
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, | ||
// as well as the source of randomness. | ||
var mlContext = new MLContext(); | ||
|
||
// Create an empty data sample list. The 'ApplyWordEmbedding' does not require training data as | ||
// the estimator ('WordEmbeddingEstimator') created by 'ApplyWordEmbedding' API is not a trainable estimator. | ||
// The empty list is only needed to pass input schema to the pipeline. | ||
var emptySamples = new List<TextData>(); | ||
|
||
// Convert sample list to an empty IDataView. | ||
var emptyDataView = mlContext.Data.LoadFromEnumerable(emptySamples); | ||
|
||
// A pipeline for converting text into a 150-dimension embedding vector using pretrained 'SentimentSpecificWordEmbedding' model. | ||
// The 'ApplyWordEmbedding' computes the minimum, average and maximum values for each token's embedding vector. | ||
// Tokens in 'SentimentSpecificWordEmbedding' model are represented as 50-dimension vector. | ||
// Therefore, the output is of 150-dimension [min, avg, max]. | ||
// | ||
// The 'ApplyWordEmbedding' API requires vector of text as input. | ||
// The pipeline first normalizes and tokenizes text then applies word embedding transformation. | ||
var textPipeline = mlContext.Transforms.Text.NormalizeText("Text") | ||
.Append(mlContext.Transforms.Text.TokenizeIntoWords("Tokens", "Text")) | ||
.Append(mlContext.Transforms.Text.ApplyWordEmbedding("Features", "Tokens", | ||
Transforms.Text.WordEmbeddingEstimator.PretrainedModelKind.SentimentSpecificWordEmbedding)); | ||
|
||
// Fit to data. | ||
var textTransformer = textPipeline.Fit(emptyDataView); | ||
|
||
// Create the prediction engine to get the embedding vector from the input text/string. | ||
var predictionEngine = mlContext.Model.CreatePredictionEngine<TextData, TransformedTextData>(textTransformer); | ||
|
||
// Call the prediction API to convert the text into embedding vector. | ||
var data = new TextData() { Text = "This is a great product. I would like to buy it again." }; | ||
var prediction = predictionEngine.Predict(data); | ||
|
||
// Print the length of the embedding vector. | ||
Console.WriteLine($"Number of Features: {prediction.Features.Length}"); | ||
|
||
// Print the embedding vector. | ||
Console.Write("Features: "); | ||
foreach (var f in prediction.Features) | ||
Console.Write($"{f:F4} "); | ||
|
||
// Expected output: | ||
// Number of Features: 150 | ||
// Features: -1.2489 0.2384 -1.3034 -0.9135 -3.4978 -0.1784 -1.3823 -0.3863 -2.5262 -0.8950 ... | ||
} | ||
|
||
public class TextData | ||
{ | ||
public string Text { get; set; } | ||
} | ||
|
||
public class TransformedTextData : TextData | ||
{ | ||
public float[] Features { get; set; } | ||
} | ||
} | ||
} |
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -125,7 +125,7 @@ public static TextNormalizingEstimator NormalizeText(this TransformsCatalog.Text | |
/// <example> | ||
/// <format type="text/markdown"> | ||
/// <] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Should we delete this file Or did we retain this file because its used for some other API sample ? #Closed There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
/// [!code-csharp[ApplyWordEmbedding](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyWordEmbedding.cs)] | ||
/// ]]> | ||
/// </format> | ||
/// </example> | ||
|
@@ -143,7 +143,7 @@ public static WordEmbeddingEstimator ApplyWordEmbedding(this TransformsCatalog.T | |
/// <example> | ||
/// <format type="text/markdown"> | ||
/// <] | ||
/// [!code-csharp[ApplyWordEmbedding](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyCustomWordEmbedding.cs)] | ||
/// ]]> | ||
/// </format> | ||
/// </example> | ||
|
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is this sample used anywhere ? #Resolved
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Look at the overloaded version of
ApplyWordEmbedding
method in TextCatalog. It is referenced as an example there similar to other methods.In reply to: 270986934 [](ancestors = 270986934)