Skip to content

Commit fc2c2b2

Browse files
committed
review comments
1 parent 558b026 commit fc2c2b2

File tree

7 files changed

+183
-28
lines changed

7 files changed

+183
-28
lines changed

docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Clustering/Clustering.ttinclude

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ namespace Samples.Dynamic.Trainers.Clustering
4747
// Convert IDataView object to a list.
4848
var predictions = mlContext.Data.CreateEnumerable<Prediction>(transformedTestData, reuseRowObject: false).ToList();
4949

50-
// Look at 5 predictions
50+
// Print 5 predictions. Note that the label is only used as a comparison wiht the predicted label.
51+
// It is not used during training.
5152
foreach (var p in predictions.Take(2))
5253
Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
5354
foreach (var p in predictions.TakeLast(3))
@@ -57,9 +58,7 @@ namespace Samples.Dynamic.Trainers.Clustering
5758

5859
// Evaluate the overall metrics
5960
var metrics = mlContext.Clustering.Evaluate(transformedTestData, "Label", "Score", "Features");
60-
Console.WriteLine($"Normalized Mutual Information: {metrics.NormalizedMutualInformation:F2}");
61-
Console.WriteLine($"Average Distance: {metrics.AverageDistance:F2}");
62-
Console.WriteLine($"Davies Bouldin Index: {metrics.DaviesBouldinIndex:F2}");
61+
PrintMetrics(metrics);
6362

6463
<#=ExpectedOutput#>
6564

@@ -85,7 +84,7 @@ namespace Samples.Dynamic.Trainers.Clustering
8584
{
8685
Label = (uint)label,
8786
// Create random features with two clusters.
88-
// The first half has feature values cetered around 0.6 the second half has values centered around 0.4.
87+
// The first half has feature values centered around 0.6 the second half has values centered around 0.4.
8988
Features = Enumerable.Repeat(label, 50).Select(index => label == 0 ? randomFloat() + 0.1f : randomFloat() - 0.1f).ToArray()
9089
};
9190
}
@@ -94,6 +93,7 @@ namespace Samples.Dynamic.Trainers.Clustering
9493
// Example with label and 50 feature values. A data set is a collection of such examples.
9594
private class DataPoint
9695
{
96+
// The label is not used during training, just for comparison with the predicted label.
9797
[KeyType(2)]
9898
public uint Label { get; set; }
9999

@@ -104,10 +104,18 @@ namespace Samples.Dynamic.Trainers.Clustering
104104
// Class used to capture predictions.
105105
private class Prediction
106106
{
107-
// Original label.
107+
// Original label (not used during training, just for comparison).
108108
public uint Label { get; set; }
109109
// Predicted label from the trainer.
110110
public uint PredictedLabel { get; set; }
111111
}
112+
113+
// Pretty-print of ClusteringMetrics object.
114+
private static void PrintMetrics(ClusteringMetrics metrics)
115+
{
116+
Console.WriteLine($"Normalized Mutual Information: {metrics.NormalizedMutualInformation:F2}");
117+
Console.WriteLine($"Average Distance: {metrics.AverageDistance:F2}");
118+
Console.WriteLine($"Davies Bouldin Index: {metrics.DaviesBouldinIndex:F2}");
119+
}
112120
}
113121
}

docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Clustering/KMeans.cs

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ public static void Example()
3636
// Convert IDataView object to a list.
3737
var predictions = mlContext.Data.CreateEnumerable<Prediction>(transformedTestData, reuseRowObject: false).ToList();
3838

39-
// Look at 5 predictions
39+
// Print 5 predictions. Note that the label is only used as a comparison wiht the predicted label.
40+
// It is not used during training.
4041
foreach (var p in predictions.Take(2))
4142
Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
4243
foreach (var p in predictions.TakeLast(3))
@@ -51,9 +52,7 @@ public static void Example()
5152

5253
// Evaluate the overall metrics
5354
var metrics = mlContext.Clustering.Evaluate(transformedTestData, "Label", "Score", "Features");
54-
Console.WriteLine($"Normalized Mutual Information: {metrics.NormalizedMutualInformation:F2}");
55-
Console.WriteLine($"Average Distance: {metrics.AverageDistance:F2}");
56-
Console.WriteLine($"Davies Bouldin Index: {metrics.DaviesBouldinIndex:F2}");
55+
PrintMetrics(metrics);
5756

5857
// Expected output:
5958
// Normalized Mutual Information: 0.95
@@ -71,8 +70,6 @@ public static void Example()
7170
// Expected output similar to:
7271
// The first 3 coordinates of the first centroid are: (0.6035213, 0.6017533, 0.5964218)
7372
// The first 3 coordinates of the second centroid are: (0.4031044, 0.4175443, 0.4082336)
74-
//
75-
// Note: use the advanced options constructor to set the number of threads to 1 for a deterministic behavior.
7673
}
7774

7875
private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int seed = 0)
@@ -86,7 +83,7 @@ private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int se
8683
{
8784
Label = (uint)label,
8885
// Create random features with two clusters.
89-
// The first half has feature values cetered around 0.6 the second half has values centered around 0.4.
86+
// The first half has feature values centered around 0.6 the second half has values centered around 0.4.
9087
Features = Enumerable.Repeat(label, 50).Select(index => label == 0 ? randomFloat() + 0.1f : randomFloat() - 0.1f).ToArray()
9188
};
9289
}
@@ -95,6 +92,7 @@ private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int se
9592
// Example with label and 50 feature values. A data set is a collection of such examples.
9693
private class DataPoint
9794
{
95+
// The label is not used during training, just for comparison with the predicted label.
9896
[KeyType(2)]
9997
public uint Label { get; set; }
10098

@@ -105,10 +103,18 @@ private class DataPoint
105103
// Class used to capture predictions.
106104
private class Prediction
107105
{
108-
// Original label.
106+
// Original label (not used during training, just for comparison).
109107
public uint Label { get; set; }
110108
// Predicted label from the trainer.
111109
public uint PredictedLabel { get; set; }
112110
}
111+
112+
// Pretty-print of ClusteringMetrics object.
113+
private static void PrintMetrics(ClusteringMetrics metrics)
114+
{
115+
Console.WriteLine($"Normalized Mutual Information: {metrics.NormalizedMutualInformation:F2}");
116+
Console.WriteLine($"Average Distance: {metrics.AverageDistance:F2}");
117+
Console.WriteLine($"Davies Bouldin Index: {metrics.DaviesBouldinIndex:F2}");
118+
}
113119
}
114120
}

docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Clustering/KMeans.tt

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,5 @@ string ClassName = "KMeans";
2323

2424
string ExpectedCentroidsOutput = @"// Expected output similar to:
2525
// The first 3 coordinates of the first centroid are: (0.6035213, 0.6017533, 0.5964218)
26-
// The first 3 coordinates of the second centroid are: (0.4031044, 0.4175443, 0.4082336)
27-
//
28-
// Note: use the advanced options constructor to set the number of threads to 1 for a deterministic behavior.";
26+
// The first 3 coordinates of the second centroid are: (0.4031044, 0.4175443, 0.4082336)";
2927
#>

docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Clustering/KMeansWithOptions.cs

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ public static void Example()
2626
var options = new KMeansTrainer.Options
2727
{
2828
NumberOfClusters = 2,
29-
MaximumNumberOfIterations = 100,
3029
OptimizationTolerance = 1e-6f,
3130
NumberOfThreads = 1
3231
};
@@ -46,7 +45,8 @@ public static void Example()
4645
// Convert IDataView object to a list.
4746
var predictions = mlContext.Data.CreateEnumerable<Prediction>(transformedTestData, reuseRowObject: false).ToList();
4847

49-
// Look at 5 predictions
48+
// Print 5 predictions. Note that the label is only used as a comparison wiht the predicted label.
49+
// It is not used during training.
5050
foreach (var p in predictions.Take(2))
5151
Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
5252
foreach (var p in predictions.TakeLast(3))
@@ -61,9 +61,7 @@ public static void Example()
6161

6262
// Evaluate the overall metrics
6363
var metrics = mlContext.Clustering.Evaluate(transformedTestData, "Label", "Score", "Features");
64-
Console.WriteLine($"Normalized Mutual Information: {metrics.NormalizedMutualInformation:F2}");
65-
Console.WriteLine($"Average Distance: {metrics.AverageDistance:F2}");
66-
Console.WriteLine($"Davies Bouldin Index: {metrics.DaviesBouldinIndex:F2}");
64+
PrintMetrics(metrics);
6765

6866
// Expected output:
6967
// Normalized Mutual Information: 0.92
@@ -94,7 +92,7 @@ private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int se
9492
{
9593
Label = (uint)label,
9694
// Create random features with two clusters.
97-
// The first half has feature values cetered around 0.6 the second half has values centered around 0.4.
95+
// The first half has feature values centered around 0.6 the second half has values centered around 0.4.
9896
Features = Enumerable.Repeat(label, 50).Select(index => label == 0 ? randomFloat() + 0.1f : randomFloat() - 0.1f).ToArray()
9997
};
10098
}
@@ -103,6 +101,7 @@ private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int se
103101
// Example with label and 50 feature values. A data set is a collection of such examples.
104102
private class DataPoint
105103
{
104+
// The label is not used during training, just for comparison with the predicted label.
106105
[KeyType(2)]
107106
public uint Label { get; set; }
108107

@@ -113,10 +112,18 @@ private class DataPoint
113112
// Class used to capture predictions.
114113
private class Prediction
115114
{
116-
// Original label.
115+
// Original label (not used during training, just for comparison).
117116
public uint Label { get; set; }
118117
// Predicted label from the trainer.
119118
public uint PredictedLabel { get; set; }
120119
}
120+
121+
// Pretty-print of ClusteringMetrics object.
122+
private static void PrintMetrics(ClusteringMetrics metrics)
123+
{
124+
Console.WriteLine($"Normalized Mutual Information: {metrics.NormalizedMutualInformation:F2}");
125+
Console.WriteLine($"Average Distance: {metrics.AverageDistance:F2}");
126+
Console.WriteLine($"Davies Bouldin Index: {metrics.DaviesBouldinIndex:F2}");
127+
}
121128
}
122129
}

docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Clustering/KMeansWithOptions.tt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ string Trainer = "KMeans";
55
string TrainerOptions = @"KMeansTrainer.Options
66
{
77
NumberOfClusters = 2,
8-
MaximumNumberOfIterations = 100,
98
OptimizationTolerance = 1e-6f,
109
NumberOfThreads = 1
1110
}";

docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj

Lines changed: 134 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
<PublicSign>false</PublicSign>
88
<RootNamespace>Samples</RootNamespace>
99
</PropertyGroup>
10-
10+
1111
<ItemGroup>
1212
<ProjectReference Include="..\..\..\src\Microsoft.ML.LightGbm.StaticPipe\Microsoft.ML.LightGbm.StaticPipe.csproj" />
1313
<ProjectReference Include="..\..\..\src\Microsoft.ML.LightGbm\Microsoft.ML.LightGbm.csproj" />
@@ -69,6 +69,14 @@
6969
</ItemGroup>
7070

7171
<ItemGroup>
72+
<None Update="Dynamic\Trainers\BinaryClassification\AveragedPerceptron.tt">
73+
<Generator>TextTemplatingFileGenerator</Generator>
74+
<LastGenOutput>AveragedPerceptron.cs</LastGenOutput>
75+
</None>
76+
<None Update="Dynamic\Trainers\BinaryClassification\AveragedPerceptronWithOptions.tt">
77+
<Generator>TextTemplatingFileGenerator</Generator>
78+
<LastGenOutput>AveragedPerceptronWithOptions.cs</LastGenOutput>
79+
</None>
7280
<None Update="Dynamic\Trainers\BinaryClassification\FastForest.tt">
7381
<Generator>TextTemplatingFileGenerator</Generator>
7482
<LastGenOutput>FastForest.cs</LastGenOutput>
@@ -93,6 +101,66 @@
93101
<Generator>TextTemplatingFileGenerator</Generator>
94102
<LastGenOutput>LbfgsLogisticRegression.cs</LastGenOutput>
95103
</None>
104+
<None Update="Dynamic\Trainers\BinaryClassification\LightGbm.tt">
105+
<Generator>TextTemplatingFileGenerator</Generator>
106+
<LastGenOutput>LightGbm.cs</LastGenOutput>
107+
</None>
108+
<None Update="Dynamic\Trainers\BinaryClassification\LightGbmWithOptions.tt">
109+
<Generator>TextTemplatingFileGenerator</Generator>
110+
<LastGenOutput>LightGbmWithOptions.cs</LastGenOutput>
111+
</None>
112+
<None Update="Dynamic\Trainers\BinaryClassification\LinearSvm.tt">
113+
<Generator>TextTemplatingFileGenerator</Generator>
114+
<LastGenOutput>LinearSvm.cs</LastGenOutput>
115+
</None>
116+
<None Update="Dynamic\Trainers\BinaryClassification\LinearSvmWithOptions.tt">
117+
<Generator>TextTemplatingFileGenerator</Generator>
118+
<LastGenOutput>LinearSvmWithOptions.cs</LastGenOutput>
119+
</None>
120+
<None Update="Dynamic\Trainers\BinaryClassification\PriorTrainer.tt">
121+
<Generator>TextTemplatingFileGenerator</Generator>
122+
<LastGenOutput>PriorTrainer.cs</LastGenOutput>
123+
</None>
124+
<None Update="Dynamic\Trainers\BinaryClassification\SdcaLogisticRegression.tt">
125+
<Generator>TextTemplatingFileGenerator</Generator>
126+
<LastGenOutput>SdcaLogisticRegression.cs</LastGenOutput>
127+
</None>
128+
<None Update="Dynamic\Trainers\BinaryClassification\SdcaLogisticRegressionWithOptions.tt">
129+
<Generator>TextTemplatingFileGenerator</Generator>
130+
<LastGenOutput>SdcaLogisticRegressionWithOptions.cs</LastGenOutput>
131+
</None>
132+
<None Update="Dynamic\Trainers\BinaryClassification\SdcaNonCalibrated.tt">
133+
<Generator>TextTemplatingFileGenerator</Generator>
134+
<LastGenOutput>SdcaNonCalibrated.cs</LastGenOutput>
135+
</None>
136+
<None Update="Dynamic\Trainers\BinaryClassification\SdcaNonCalibratedWithOptions.tt">
137+
<Generator>TextTemplatingFileGenerator</Generator>
138+
<LastGenOutput>SdcaNonCalibratedWithOptions.cs</LastGenOutput>
139+
</None>
140+
<None Update="Dynamic\Trainers\BinaryClassification\SgdCalibrated.tt">
141+
<Generator>TextTemplatingFileGenerator</Generator>
142+
<LastGenOutput>SgdCalibrated.cs</LastGenOutput>
143+
</None>
144+
<None Update="Dynamic\Trainers\BinaryClassification\SgdCalibratedWithOptions.tt">
145+
<Generator>TextTemplatingFileGenerator</Generator>
146+
<LastGenOutput>SgdCalibratedWithOptions.cs</LastGenOutput>
147+
</None>
148+
<None Update="Dynamic\Trainers\BinaryClassification\SgdNonCalibrated.tt">
149+
<Generator>TextTemplatingFileGenerator</Generator>
150+
<LastGenOutput>SgdNonCalibrated.cs</LastGenOutput>
151+
</None>
152+
<None Update="Dynamic\Trainers\BinaryClassification\SgdNonCalibratedWithOptions.tt">
153+
<Generator>TextTemplatingFileGenerator</Generator>
154+
<LastGenOutput>SgdNonCalibratedWithOptions.cs</LastGenOutput>
155+
</None>
156+
<None Update="Dynamic\Trainers\BinaryClassification\SymbolicSgdLogisticRegression.tt">
157+
<Generator>TextTemplatingFileGenerator</Generator>
158+
<LastGenOutput>SymbolicSgdLogisticRegression.cs</LastGenOutput>
159+
</None>
160+
<None Update="Dynamic\Trainers\BinaryClassification\SymbolicSgdLogisticRegressionWithOptions.tt">
161+
<Generator>TextTemplatingFileGenerator</Generator>
162+
<LastGenOutput>SymbolicSgdLogisticRegressionWithOptions.cs</LastGenOutput>
163+
</None>
96164
<None Update="Dynamic\Trainers\Clustering\KMeans.tt">
97165
<Generator>TextTemplatingFileGenerator</Generator>
98166
<LastGenOutput>KMeans.cs</LastGenOutput>
@@ -162,6 +230,71 @@
162230
<AutoGen>True</AutoGen>
163231
<DependentUpon>LbfgsLogisticRegressionWithOptions.tt</DependentUpon>
164232
</Compile>
233+
<Compile Update="Dynamic\Trainers\BinaryClassification\LinearSvm.cs">
234+
<DesignTime>True</DesignTime>
235+
<AutoGen>True</AutoGen>
236+
<DependentUpon>LinearSvm.tt</DependentUpon>
237+
</Compile>
238+
<Compile Update="Dynamic\Trainers\BinaryClassification\LinearSvmWithOptions.cs">
239+
<DesignTime>True</DesignTime>
240+
<AutoGen>True</AutoGen>
241+
<DependentUpon>LinearSvmWithOptions.tt</DependentUpon>
242+
</Compile>
243+
<Compile Update="Dynamic\Trainers\BinaryClassification\PriorTrainer.cs">
244+
<DesignTime>True</DesignTime>
245+
<AutoGen>True</AutoGen>
246+
<DependentUpon>PriorTrainer.tt</DependentUpon>
247+
</Compile>
248+
<Compile Update="Dynamic\Trainers\BinaryClassification\SdcaLogisticRegression.cs">
249+
<DesignTime>True</DesignTime>
250+
<AutoGen>True</AutoGen>
251+
<DependentUpon>SdcaLogisticRegression.tt</DependentUpon>
252+
</Compile>
253+
<Compile Update="Dynamic\Trainers\BinaryClassification\SdcaLogisticRegressionWithOptions.cs">
254+
<DesignTime>True</DesignTime>
255+
<AutoGen>True</AutoGen>
256+
<DependentUpon>SdcaLogisticRegressionWithOptions.tt</DependentUpon>
257+
</Compile>
258+
<Compile Update="Dynamic\Trainers\BinaryClassification\SdcaNonCalibrated.cs">
259+
<DesignTime>True</DesignTime>
260+
<AutoGen>True</AutoGen>
261+
<DependentUpon>SdcaNonCalibrated.tt</DependentUpon>
262+
</Compile>
263+
<Compile Update="Dynamic\Trainers\BinaryClassification\SdcaNonCalibratedWithOptions.cs">
264+
<DesignTime>True</DesignTime>
265+
<AutoGen>True</AutoGen>
266+
<DependentUpon>SdcaNonCalibratedWithOptions.tt</DependentUpon>
267+
</Compile>
268+
<Compile Update="Dynamic\Trainers\BinaryClassification\SgdCalibrated.cs">
269+
<DesignTime>True</DesignTime>
270+
<AutoGen>True</AutoGen>
271+
<DependentUpon>SgdCalibrated.tt</DependentUpon>
272+
</Compile>
273+
<Compile Update="Dynamic\Trainers\BinaryClassification\SgdCalibratedWithOptions.cs">
274+
<DesignTime>True</DesignTime>
275+
<AutoGen>True</AutoGen>
276+
<DependentUpon>SgdCalibratedWithOptions.tt</DependentUpon>
277+
</Compile>
278+
<Compile Update="Dynamic\Trainers\BinaryClassification\SgdNonCalibrated.cs">
279+
<DesignTime>True</DesignTime>
280+
<AutoGen>True</AutoGen>
281+
<DependentUpon>SgdNonCalibrated.tt</DependentUpon>
282+
</Compile>
283+
<Compile Update="Dynamic\Trainers\BinaryClassification\SgdNonCalibratedWithOptions.cs">
284+
<DesignTime>True</DesignTime>
285+
<AutoGen>True</AutoGen>
286+
<DependentUpon>SgdNonCalibratedWithOptions.tt</DependentUpon>
287+
</Compile>
288+
<Compile Update="Dynamic\Trainers\BinaryClassification\SymbolicSgdLogisticRegression.cs">
289+
<DesignTime>True</DesignTime>
290+
<AutoGen>True</AutoGen>
291+
<DependentUpon>SymbolicSgdLogisticRegression.tt</DependentUpon>
292+
</Compile>
293+
<Compile Update="Dynamic\Trainers\BinaryClassification\SymbolicSgdLogisticRegressionWithOptions.cs">
294+
<DesignTime>True</DesignTime>
295+
<AutoGen>True</AutoGen>
296+
<DependentUpon>SymbolicSgdLogisticRegressionWithOptions.tt</DependentUpon>
297+
</Compile>
165298
<Compile Update="Dynamic\Trainers\Clustering\KMeans.cs">
166299
<DesignTime>True</DesignTime>
167300
<AutoGen>True</AutoGen>
Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
1-
using Samples.Dynamic;
1+
using System;
2+
using Samples.Dynamic;
3+
using Samples.Dynamic.Trainers.Clustering;
24

35
namespace Microsoft.ML.Samples
46
{
57
internal static class Program
68
{
79
static void Main(string[] args)
810
{
9-
CalculateFeatureContribution.Example();
11+
KMeans.Example();
12+
KMeansWithOptions.Example();
13+
Console.ReadLine();
1014
}
1115
}
1216
}

0 commit comments

Comments
 (0)