Skip to content

Commit 21b5bb4

Browse files
authored
Created sample for 'ApplyWordEmbedding' API. (#3142)
1 parent 0a2ec3a commit 21b5bb4

File tree

4 files changed

+150
-111
lines changed

4 files changed

+150
-111
lines changed
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.IO;
4+
using System.Text;
5+
6+
namespace Microsoft.ML.Samples.Dynamic
7+
{
8+
public static class ApplyCustomWordEmbedding
9+
{
10+
public static void Example()
11+
{
12+
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
13+
// as well as the source of randomness.
14+
var mlContext = new MLContext();
15+
16+
// Create an empty data sample list. The 'ApplyWordEmbedding' does not require training data as
17+
// the estimator ('WordEmbeddingEstimator') created by 'ApplyWordEmbedding' API is not a trainable estimator.
18+
// The empty list is only needed to pass input schema to the pipeline.
19+
var emptySamples = new List<TextData>();
20+
21+
// Convert sample list to an empty IDataView.
22+
var emptyDataView = mlContext.Data.LoadFromEnumerable(emptySamples);
23+
24+
// Write a custom 3-dimensional word embedding model with 4 words.
25+
// Each line follows '<word> <float> <float> <float>' pattern.
26+
// Lines that do not confirm to the pattern are ignored.
27+
var pathToCustomModel = @".\custommodel.txt";
28+
using (StreamWriter file = new StreamWriter(pathToCustomModel, false))
29+
{
30+
file.WriteLine("great 1.0 2.0 3.0");
31+
file.WriteLine("product -1.0 -2.0 -3.0");
32+
file.WriteLine("like -1 100.0 -100");
33+
file.WriteLine("buy 0 0 20");
34+
}
35+
36+
// A pipeline for converting text into a 9-dimension word embedding vector using the custom word embedding model.
37+
// The 'ApplyWordEmbedding' computes the minimum, average and maximum values for each token's embedding vector.
38+
// Tokens in 'custommodel.txt' model are represented as 3-dimension vector.
39+
// Therefore, the output is of 9-dimension [min, avg, max].
40+
//
41+
// The 'ApplyWordEmbedding' API requires vector of text as input.
42+
// The pipeline first normalizes and tokenizes text then applies word embedding transformation.
43+
var textPipeline = mlContext.Transforms.Text.NormalizeText("Text")
44+
.Append(mlContext.Transforms.Text.TokenizeIntoWords("Tokens", "Text"))
45+
.Append(mlContext.Transforms.Text.ApplyWordEmbedding("Features", pathToCustomModel, "Tokens"));
46+
47+
// Fit to data.
48+
var textTransformer = textPipeline.Fit(emptyDataView);
49+
50+
// Create the prediction engine to get the embedding vector from the input text/string.
51+
var predictionEngine = mlContext.Model.CreatePredictionEngine<TextData, TransformedTextData>(textTransformer);
52+
53+
// Call the prediction API to convert the text into embedding vector.
54+
var data = new TextData() { Text = "This is a great product. I would like to buy it again." };
55+
var prediction = predictionEngine.Predict(data);
56+
57+
// Print the length of the embedding vector.
58+
Console.WriteLine($"Number of Features: {prediction.Features.Length}");
59+
60+
// Print the embedding vector.
61+
Console.Write("Features: ");
62+
foreach (var f in prediction.Features)
63+
Console.Write($"{f:F4} ");
64+
65+
// Expected output:
66+
// Number of Features: 9
67+
// Features: -1.0000 0.0000 -100.0000 0.0000 34.0000 -25.6667 1.0000 100.0000 20.0000
68+
}
69+
70+
public class TextData
71+
{
72+
public string Text { get; set; }
73+
}
74+
75+
public class TransformedTextData : TextData
76+
{
77+
public float[] Features { get; set; }
78+
}
79+
}
80+
}
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Text;
4+
5+
namespace Microsoft.ML.Samples.Dynamic
6+
{
7+
public static class ApplyWordEmbedding
8+
{
9+
public static void Example()
10+
{
11+
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
12+
// as well as the source of randomness.
13+
var mlContext = new MLContext();
14+
15+
// Create an empty data sample list. The 'ApplyWordEmbedding' does not require training data as
16+
// the estimator ('WordEmbeddingEstimator') created by 'ApplyWordEmbedding' API is not a trainable estimator.
17+
// The empty list is only needed to pass input schema to the pipeline.
18+
var emptySamples = new List<TextData>();
19+
20+
// Convert sample list to an empty IDataView.
21+
var emptyDataView = mlContext.Data.LoadFromEnumerable(emptySamples);
22+
23+
// A pipeline for converting text into a 150-dimension embedding vector using pretrained 'SentimentSpecificWordEmbedding' model.
24+
// The 'ApplyWordEmbedding' computes the minimum, average and maximum values for each token's embedding vector.
25+
// Tokens in 'SentimentSpecificWordEmbedding' model are represented as 50-dimension vector.
26+
// Therefore, the output is of 150-dimension [min, avg, max].
27+
//
28+
// The 'ApplyWordEmbedding' API requires vector of text as input.
29+
// The pipeline first normalizes and tokenizes text then applies word embedding transformation.
30+
var textPipeline = mlContext.Transforms.Text.NormalizeText("Text")
31+
.Append(mlContext.Transforms.Text.TokenizeIntoWords("Tokens", "Text"))
32+
.Append(mlContext.Transforms.Text.ApplyWordEmbedding("Features", "Tokens",
33+
Transforms.Text.WordEmbeddingEstimator.PretrainedModelKind.SentimentSpecificWordEmbedding));
34+
35+
// Fit to data.
36+
var textTransformer = textPipeline.Fit(emptyDataView);
37+
38+
// Create the prediction engine to get the embedding vector from the input text/string.
39+
var predictionEngine = mlContext.Model.CreatePredictionEngine<TextData, TransformedTextData>(textTransformer);
40+
41+
// Call the prediction API to convert the text into embedding vector.
42+
var data = new TextData() { Text = "This is a great product. I would like to buy it again." };
43+
var prediction = predictionEngine.Predict(data);
44+
45+
// Print the length of the embedding vector.
46+
Console.WriteLine($"Number of Features: {prediction.Features.Length}");
47+
48+
// Print the embedding vector.
49+
Console.Write("Features: ");
50+
foreach (var f in prediction.Features)
51+
Console.Write($"{f:F4} ");
52+
53+
// Expected output:
54+
// Number of Features: 150
55+
// Features: -1.2489 0.2384 -1.3034 -0.9135 -3.4978 -0.1784 -1.3823 -0.3863 -2.5262 -0.8950 ...
56+
}
57+
58+
public class TextData
59+
{
60+
public string Text { get; set; }
61+
}
62+
63+
public class TransformedTextData : TextData
64+
{
65+
public float[] Features { get; set; }
66+
}
67+
}
68+
}

docs/samples/Microsoft.ML.Samples/Dynamic/WordEmbeddingTransform.cs

Lines changed: 0 additions & 109 deletions
This file was deleted.

src/Microsoft.ML.Transforms/Text/TextCatalog.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ public static TextNormalizingEstimator NormalizeText(this TransformsCatalog.Text
125125
/// <example>
126126
/// <format type="text/markdown">
127127
/// <![CDATA[
128-
/// [!code-csharp[FeaturizeText](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/WordEmbeddingTransform.cs)]
128+
/// [!code-csharp[ApplyWordEmbedding](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyWordEmbedding.cs)]
129129
/// ]]>
130130
/// </format>
131131
/// </example>
@@ -143,7 +143,7 @@ public static WordEmbeddingEstimator ApplyWordEmbedding(this TransformsCatalog.T
143143
/// <example>
144144
/// <format type="text/markdown">
145145
/// <![CDATA[
146-
/// [!code-csharp[FeaturizeText](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/WordEmbeddingTransform.cs)]
146+
/// [!code-csharp[ApplyWordEmbedding](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ApplyCustomWordEmbedding.cs)]
147147
/// ]]>
148148
/// </format>
149149
/// </example>

0 commit comments

Comments
 (0)