Skip to content

Commit 421e91e

Browse files
committed
review comments
1 parent b23324e commit 421e91e

File tree

6 files changed

+186
-35
lines changed

6 files changed

+186
-35
lines changed

docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Clustering/Clustering.ttinclude

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ namespace Samples.Dynamic.Trainers.Clustering
4747
// Convert IDataView object to a list.
4848
var predictions = mlContext.Data.CreateEnumerable<Prediction>(transformedTestData, reuseRowObject: false).ToList();
4949

50-
// Look at 5 predictions
50+
// Print 5 predictions. Note that the label is only used as a comparison wiht the predicted label.
51+
// It is not used during training.
5152
foreach (var p in predictions.Take(2))
5253
Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
5354
foreach (var p in predictions.TakeLast(3))
@@ -57,9 +58,7 @@ namespace Samples.Dynamic.Trainers.Clustering
5758

5859
// Evaluate the overall metrics
5960
var metrics = mlContext.Clustering.Evaluate(transformedTestData, "Label", "Score", "Features");
60-
Console.WriteLine($"Normalized Mutual Information: {metrics.NormalizedMutualInformation:F2}");
61-
Console.WriteLine($"Average Distance: {metrics.AverageDistance:F2}");
62-
Console.WriteLine($"Davies Bouldin Index: {metrics.DaviesBouldinIndex:F2}");
61+
PrintMetrics(metrics);
6362

6463
<#=ExpectedOutput#>
6564

@@ -85,7 +84,7 @@ namespace Samples.Dynamic.Trainers.Clustering
8584
{
8685
Label = (uint)label,
8786
// Create random features with two clusters.
88-
// The first half has feature values cetered around 0.6 the second half has values centered around 0.4.
87+
// The first half has feature values centered around 0.6 the second half has values centered around 0.4.
8988
Features = Enumerable.Repeat(label, 50).Select(index => label == 0 ? randomFloat() + 0.1f : randomFloat() - 0.1f).ToArray()
9089
};
9190
}
@@ -94,6 +93,7 @@ namespace Samples.Dynamic.Trainers.Clustering
9493
// Example with label and 50 feature values. A data set is a collection of such examples.
9594
private class DataPoint
9695
{
96+
// The label is not used during training, just for comparison with the predicted label.
9797
[KeyType(2)]
9898
public uint Label { get; set; }
9999

@@ -104,10 +104,18 @@ namespace Samples.Dynamic.Trainers.Clustering
104104
// Class used to capture predictions.
105105
private class Prediction
106106
{
107-
// Original label.
107+
// Original label (not used during training, just for comparison).
108108
public uint Label { get; set; }
109109
// Predicted label from the trainer.
110110
public uint PredictedLabel { get; set; }
111111
}
112+
113+
// Pretty-print of ClusteringMetrics object.
114+
private static void PrintMetrics(ClusteringMetrics metrics)
115+
{
116+
Console.WriteLine($"Normalized Mutual Information: {metrics.NormalizedMutualInformation:F2}");
117+
Console.WriteLine($"Average Distance: {metrics.AverageDistance:F2}");
118+
Console.WriteLine($"Davies Bouldin Index: {metrics.DaviesBouldinIndex:F2}");
119+
}
112120
}
113121
}

docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Clustering/KMeans.cs

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ public static void Example()
3636
// Convert IDataView object to a list.
3737
var predictions = mlContext.Data.CreateEnumerable<Prediction>(transformedTestData, reuseRowObject: false).ToList();
3838

39-
// Look at 5 predictions
39+
// Print 5 predictions. Note that the label is only used as a comparison wiht the predicted label.
40+
// It is not used during training.
4041
foreach (var p in predictions.Take(2))
4142
Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
4243
foreach (var p in predictions.TakeLast(3))
@@ -51,9 +52,7 @@ public static void Example()
5152

5253
// Evaluate the overall metrics
5354
var metrics = mlContext.Clustering.Evaluate(transformedTestData, "Label", "Score", "Features");
54-
Console.WriteLine($"Normalized Mutual Information: {metrics.NormalizedMutualInformation:F2}");
55-
Console.WriteLine($"Average Distance: {metrics.AverageDistance:F2}");
56-
Console.WriteLine($"Davies Bouldin Index: {metrics.DaviesBouldinIndex:F2}");
55+
PrintMetrics(metrics);
5756

5857
// Expected output:
5958
// Normalized Mutual Information: 0.95
@@ -71,8 +70,6 @@ public static void Example()
7170
// Expected output similar to:
7271
// The first 3 coordinates of the first centroid are: (0.6035213, 0.6017533, 0.5964218)
7372
// The first 3 coordinates of the second centroid are: (0.4031044, 0.4175443, 0.4082336)
74-
//
75-
// Note: use the advanced options constructor to set the number of threads to 1 for a deterministic behavior.
7673
}
7774

7875
private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int seed = 0)
@@ -86,7 +83,7 @@ private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int se
8683
{
8784
Label = (uint)label,
8885
// Create random features with two clusters.
89-
// The first half has feature values cetered around 0.6 the second half has values centered around 0.4.
86+
// The first half has feature values centered around 0.6 the second half has values centered around 0.4.
9087
Features = Enumerable.Repeat(label, 50).Select(index => label == 0 ? randomFloat() + 0.1f : randomFloat() - 0.1f).ToArray()
9188
};
9289
}
@@ -95,6 +92,7 @@ private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int se
9592
// Example with label and 50 feature values. A data set is a collection of such examples.
9693
private class DataPoint
9794
{
95+
// The label is not used during training, just for comparison with the predicted label.
9896
[KeyType(2)]
9997
public uint Label { get; set; }
10098

@@ -105,10 +103,18 @@ private class DataPoint
105103
// Class used to capture predictions.
106104
private class Prediction
107105
{
108-
// Original label.
106+
// Original label (not used during training, just for comparison).
109107
public uint Label { get; set; }
110108
// Predicted label from the trainer.
111109
public uint PredictedLabel { get; set; }
112110
}
111+
112+
// Pretty-print of ClusteringMetrics object.
113+
private static void PrintMetrics(ClusteringMetrics metrics)
114+
{
115+
Console.WriteLine($"Normalized Mutual Information: {metrics.NormalizedMutualInformation:F2}");
116+
Console.WriteLine($"Average Distance: {metrics.AverageDistance:F2}");
117+
Console.WriteLine($"Davies Bouldin Index: {metrics.DaviesBouldinIndex:F2}");
118+
}
113119
}
114120
}
Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,27 @@
11
<#@ include file="Clustering.ttinclude"#>
22
<#+
33
string ClassName = "KMeans";
4-
string Trainer = "KMeans";
5-
string TrainerOptions = null;
6-
string InlineTrainerOptions = "numberOfClusters: 2";
7-
int DataSeed = 123;
4+
string Trainer = "KMeans";
5+
string TrainerOptions = null;
6+
string InlineTrainerOptions = "numberOfClusters: 2";
7+
int DataSeed = 123;
88

9-
string OptionsInclude = "";
10-
string Comments = "";
9+
string OptionsInclude = "";
10+
string Comments = "";
1111

12-
string ExpectedOutputPerInstance = @"// Expected output:
12+
string ExpectedOutputPerInstance = @"// Expected output:
1313
// Label: 1, Prediction: 1
1414
// Label: 1, Prediction: 1
1515
// Label: 2, Prediction: 2
1616
// Label: 2, Prediction: 2
1717
// Label: 2, Prediction: 2";
1818

19-
string ExpectedOutput = @"// Expected output:
19+
string ExpectedOutput = @"// Expected output:
2020
// Normalized Mutual Information: 0.95
2121
// Average Distance: 4.17
2222
// Davies Bouldin Index: 2.87";
2323

24-
string ExpectedCentroidsOutput = @"// Expected output similar to:
24+
string ExpectedCentroidsOutput = @"// Expected output similar to:
2525
// The first 3 coordinates of the first centroid are: (0.6035213, 0.6017533, 0.5964218)
26-
// The first 3 coordinates of the second centroid are: (0.4031044, 0.4175443, 0.4082336)
27-
//
28-
// Note: use the advanced options constructor to set the number of threads to 1 for a deterministic behavior.";
26+
// The first 3 coordinates of the second centroid are: (0.4031044, 0.4175443, 0.4082336)";
2927
#>

docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Clustering/KMeansWithOptions.cs

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ public static void Example()
2626
var options = new KMeansTrainer.Options
2727
{
2828
NumberOfClusters = 2,
29-
MaximumNumberOfIterations = 100,
3029
OptimizationTolerance = 1e-6f,
3130
NumberOfThreads = 1
3231
};
@@ -46,7 +45,8 @@ public static void Example()
4645
// Convert IDataView object to a list.
4746
var predictions = mlContext.Data.CreateEnumerable<Prediction>(transformedTestData, reuseRowObject: false).ToList();
4847

49-
// Look at 5 predictions
48+
// Print 5 predictions. Note that the label is only used as a comparison wiht the predicted label.
49+
// It is not used during training.
5050
foreach (var p in predictions.Take(2))
5151
Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
5252
foreach (var p in predictions.TakeLast(3))
@@ -61,9 +61,7 @@ public static void Example()
6161

6262
// Evaluate the overall metrics
6363
var metrics = mlContext.Clustering.Evaluate(transformedTestData, "Label", "Score", "Features");
64-
Console.WriteLine($"Normalized Mutual Information: {metrics.NormalizedMutualInformation:F2}");
65-
Console.WriteLine($"Average Distance: {metrics.AverageDistance:F2}");
66-
Console.WriteLine($"Davies Bouldin Index: {metrics.DaviesBouldinIndex:F2}");
64+
PrintMetrics(metrics);
6765

6866
// Expected output:
6967
// Normalized Mutual Information: 0.92
@@ -94,7 +92,7 @@ private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int se
9492
{
9593
Label = (uint)label,
9694
// Create random features with two clusters.
97-
// The first half has feature values cetered around 0.6 the second half has values centered around 0.4.
95+
// The first half has feature values centered around 0.6 the second half has values centered around 0.4.
9896
Features = Enumerable.Repeat(label, 50).Select(index => label == 0 ? randomFloat() + 0.1f : randomFloat() - 0.1f).ToArray()
9997
};
10098
}
@@ -103,6 +101,7 @@ private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int se
103101
// Example with label and 50 feature values. A data set is a collection of such examples.
104102
private class DataPoint
105103
{
104+
// The label is not used during training, just for comparison with the predicted label.
106105
[KeyType(2)]
107106
public uint Label { get; set; }
108107

@@ -113,10 +112,18 @@ private class DataPoint
113112
// Class used to capture predictions.
114113
private class Prediction
115114
{
116-
// Original label.
115+
// Original label (not used during training, just for comparison).
117116
public uint Label { get; set; }
118117
// Predicted label from the trainer.
119118
public uint PredictedLabel { get; set; }
120119
}
120+
121+
// Pretty-print of ClusteringMetrics object.
122+
private static void PrintMetrics(ClusteringMetrics metrics)
123+
{
124+
Console.WriteLine($"Normalized Mutual Information: {metrics.NormalizedMutualInformation:F2}");
125+
Console.WriteLine($"Average Distance: {metrics.AverageDistance:F2}");
126+
Console.WriteLine($"Davies Bouldin Index: {metrics.DaviesBouldinIndex:F2}");
127+
}
121128
}
122129
}

docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Clustering/KMeansWithOptions.tt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ string Trainer = "KMeans";
55
string TrainerOptions = @"KMeansTrainer.Options
66
{
77
NumberOfClusters = 2,
8-
MaximumNumberOfIterations = 100,
98
OptimizationTolerance = 1e-6f,
109
NumberOfThreads = 1
1110
}";

0 commit comments

Comments
 (0)