Skip to content

Commit db5d71d

Browse files
committed
Add an example of random PCA
Fix build Polish example
1 parent 129b47c commit db5d71d

File tree

2 files changed

+156
-0
lines changed

2 files changed

+156
-0
lines changed
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using Microsoft.ML.Data;
5+
6+
namespace Microsoft.ML.Samples.Dynamic.Trainers.AnomalyDetection
7+
{
8+
public static class RandomizedPcaSample
9+
{
10+
/// <summary>
11+
/// Example with 3 feature values.
12+
/// </summary>
13+
private class DataPoint
14+
{
15+
[VectorType(3)]
16+
public float[] Features;
17+
}
18+
19+
/// <summary>
20+
/// Class used to capture prediction of <see cref="DataPoint"/> in <see cref="Example"/>.
21+
/// </summary>
22+
// We disable this warning because complier doesn't realize those fields below are assigned somewhere.
23+
#pragma warning disable 649
24+
private class Result
25+
{
26+
// Outlier gets false while inlier has true.
27+
public bool PredictedLabel;
28+
// Outlier gets smaller score.
29+
public float Score;
30+
}
31+
#pragma warning restore 649
32+
33+
public static void Example()
34+
{
35+
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
36+
// as a catalog of available operations and as the source of randomness.
37+
// Setting the seed to a fixed number in this example to make outputs deterministic.
38+
var mlContext = new MLContext(seed: 0);
39+
40+
var samples = new List<DataPoint>()
41+
{
42+
new DataPoint(){ Features= new float[3] {1, 0, 0} },
43+
new DataPoint(){ Features= new float[3] {0, 2, 1} },
44+
new DataPoint(){ Features= new float[3] {1, 2, 3} },
45+
new DataPoint(){ Features= new float[3] {0, 1, 0} },
46+
new DataPoint(){ Features= new float[3] {0, 2, 1} },
47+
new DataPoint(){ Features= new float[3] {-100, -50, -100} }
48+
};
49+
50+
// Convert native C# class to IDataView, a consumble format to ML.NET functions.
51+
var data = mlContext.Data.LoadFromEnumerable(samples);
52+
53+
// Create an anomaly detector. Its underlying algorithm is randomized PCA.
54+
var pipeline = mlContext.AnomalyDetection.Trainers.RandomizedPca(featureColumnName: nameof(DataPoint.Features), rank: 1, center: false);
55+
56+
// Train the anomaly detector.
57+
var model = pipeline.Fit(data);
58+
59+
// Apply the trained model on the training data.
60+
var transformed = model.Transform(data);
61+
62+
// Read ML.NET predictions into C# class.
63+
var results = mlContext.Data.CreateEnumerable<Result>(transformed, reuseRowObject: false).ToList();
64+
65+
// Let's go through all predictions.
66+
for (int i = 0; i < samples.Count; ++i)
67+
{
68+
// The i-th example's prediction result.
69+
var result = results[i];
70+
71+
// The i-th example's feature vector in text format.
72+
var featuresInText = string.Join(',', samples[i].Features);
73+
74+
if (result.PredictedLabel)
75+
// The i-th sample is predicted as an inlier.
76+
Console.WriteLine("The {0}-th example with features [{1}] is an inlier with a score of being inlier {2}",
77+
i, featuresInText, result.Score);
78+
else
79+
// The i-th sample is predicted as an outlier.
80+
Console.WriteLine("The {0}-th example with features [{1}] is an outlier with a score of being inlier {2}",
81+
i, featuresInText, result.Score);
82+
}
83+
}
84+
}
85+
}

test/Microsoft.ML.Tests/AnomalyDetectionTests.cs

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
// See the LICENSE file in the project root for more information.
44

55
using System;
6+
using System.Collections.Generic;
7+
using System.Linq;
68
using Microsoft.Data.DataView;
79
using Microsoft.ML.Data;
810
using Microsoft.ML.RunTests;
@@ -48,6 +50,75 @@ public void NoAnomalyTest()
4850
Assert.Throws<ArgumentOutOfRangeException>(() => ML.AnomalyDetection.Evaluate(transformedData));
4951
}
5052

53+
/// <summary>
54+
/// Example with 3 feature values.
55+
/// </summary>
56+
private class DataPoint
57+
{
58+
[VectorType(3)]
59+
public float[] Features;
60+
}
61+
62+
/// <summary>
63+
/// Class used to capture prediction of <see cref="DataPoint"/> in <see cref="RandomizedPcaInMemory"/>.
64+
/// </summary>
65+
#pragma warning disable 649
66+
private class Result
67+
{
68+
// Outlier gets false while inlier has true.
69+
public bool PredictedLabel;
70+
// Outlier gets smaller score.
71+
public float Score;
72+
}
73+
#pragma warning restore 649
74+
75+
[Fact]
76+
public void RandomizedPcaInMemory()
77+
{
78+
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
79+
// as a catalog of available operations and as the source of randomness.
80+
// Setting the seed to a fixed number in this example to make outputs deterministic.
81+
var mlContext = new MLContext(seed: 0);
82+
83+
var samples = new List<DataPoint>()
84+
{
85+
new DataPoint(){ Features= new float[3] {1, 0, 0} },
86+
new DataPoint(){ Features= new float[3] {0, 2, 1} },
87+
new DataPoint(){ Features= new float[3] {1, 2, 3} },
88+
new DataPoint(){ Features= new float[3] {0, 1, 0} },
89+
new DataPoint(){ Features= new float[3] {0, 2, 1} },
90+
new DataPoint(){ Features= new float[3] {-100, -50, -100} }
91+
};
92+
93+
// Convert native C# class to IDataView, a consumble format to ML.NET functions.
94+
var data = mlContext.Data.LoadFromEnumerable(samples);
95+
96+
// Create an anomaly detector. Its underlying algorithm is randomized PCA.
97+
var pipeline = mlContext.AnomalyDetection.Trainers.RandomizedPca(featureColumnName: nameof(DataPoint.Features), rank: 1, center: false);
98+
99+
// Train the anomaly detector.
100+
var model = pipeline.Fit(data);
101+
102+
// Apply the trained model on the training data.
103+
var transformed = model.Transform(data);
104+
105+
// Read ML.NET predictions into C# class.
106+
var results = mlContext.Data.CreateEnumerable<Result>(transformed, reuseRowObject: false).ToList();
107+
108+
// First 5 examples are inliers.
109+
for (int i = 0; i < 5; ++i)
110+
{
111+
// Inlier should be predicted as true.
112+
Assert.True(results[i].PredictedLabel);
113+
// Higher score means closer to inlier.
114+
Assert.InRange(results[i].Score, 0.3, 1);
115+
}
116+
117+
// Last example is outlier. Note that outlier should be predicted as false.
118+
Assert.False(results[5].PredictedLabel);
119+
Assert.InRange(results[5].Score, 0, 0.3);
120+
}
121+
51122
private IDataView DetectAnomalyInMnistOneClass(string trainPath, string testPath)
52123
{
53124
var loader = ML.Data.CreateTextLoader(new[]

0 commit comments

Comments
 (0)