1
- using Microsoft . ML ;
1
+ using System ;
2
+ using System . Collections . Generic ;
3
+ using System . Linq ;
4
+ using Microsoft . ML ;
5
+ using Microsoft . ML . Data ;
2
6
using Microsoft . ML . Trainers ;
3
7
4
8
namespace Samples . Dynamic . Trainers . BinaryClassification
5
9
{
6
10
public static class AveragedPerceptronWithOptions
7
11
{
8
- // In this examples we will use the adult income dataset. The goal is to predict
9
- // if a person's income is above $50K or not, based on demographic information about that person.
10
- // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult.
11
12
public static void Example ( )
12
13
{
13
14
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
14
15
// as a catalog of available operations and as the source of randomness.
15
16
// Setting the seed to a fixed number in this example to make outputs deterministic.
16
17
var mlContext = new MLContext ( seed : 0 ) ;
17
18
18
- // Download and featurize the dataset .
19
- var data = Microsoft . ML . SamplesUtils . DatasetUtils . LoadFeaturizedAdultDataset ( mlContext ) ;
19
+ // Create a list of training data points .
20
+ var dataPoints = GenerateRandomDataPoints ( 1000 ) ;
20
21
21
- // Leave out 10% of data for testing .
22
- var trainTestData = mlContext . Data . TrainTestSplit ( data , testFraction : 0.1 ) ;
22
+ // Convert the list of data points to an IDataView object, which is consumable by ML.NET API .
23
+ var trainingData = mlContext . Data . LoadFromEnumerable ( dataPoints ) ;
23
24
24
- // Define the trainer options.
25
- var options = new AveragedPerceptronTrainer . Options ( )
25
+ // Define trainer options.
26
+ var options = new AveragedPerceptronTrainer . Options
26
27
{
27
28
LossFunction = new SmoothedHingeLoss ( ) ,
28
29
LearningRate = 0.1f ,
@@ -31,25 +32,90 @@ public static void Example()
31
32
NumberOfIterations = 10
32
33
} ;
33
34
34
- // Create data training pipeline .
35
+ // Define the trainer .
35
36
var pipeline = mlContext . BinaryClassification . Trainers . AveragedPerceptron ( options ) ;
36
37
37
- // Fit this pipeline to the training data .
38
- var model = pipeline . Fit ( trainTestData . TrainSet ) ;
38
+ // Train the model .
39
+ var model = pipeline . Fit ( trainingData ) ;
39
40
40
- // Evaluate how the model is doing on the test data.
41
- var dataWithPredictions = model . Transform ( trainTestData . TestSet ) ;
42
- var metrics = mlContext . BinaryClassification . EvaluateNonCalibrated ( dataWithPredictions ) ;
43
- Microsoft . ML . SamplesUtils . ConsoleUtils . PrintMetrics ( metrics ) ;
41
+ // Create testing data. Use different random seed to make it different from training data.
42
+ var testData = mlContext . Data . LoadFromEnumerable ( GenerateRandomDataPoints ( 500 , seed : 123 ) ) ;
43
+
44
+ // Run the model on test data set.
45
+ var transformedTestData = model . Transform ( testData ) ;
46
+
47
+ // Convert IDataView object to a list.
48
+ var predictions = mlContext . Data . CreateEnumerable < Prediction > ( transformedTestData , reuseRowObject : false ) . ToList ( ) ;
49
+
50
+ // Print 5 predictions.
51
+ foreach ( var p in predictions . Take ( 5 ) )
52
+ Console . WriteLine ( $ "Label: { p . Label } , Prediction: { p . PredictedLabel } ") ;
44
53
45
54
// Expected output:
46
- // Accuracy: 0.86
47
- // AUC: 0.90
48
- // F1 Score: 0.66
49
- // Negative Precision: 0.89
50
- // Negative Recall: 0.93
51
- // Positive Precision: 0.72
52
- // Positive Recall: 0.61
55
+ // Label: True, Prediction: True
56
+ // Label: False, Prediction: False
57
+ // Label: True, Prediction: True
58
+ // Label: True, Prediction: True
59
+ // Label: False, Prediction: False
60
+
61
+ // Evaluate the overall metrics.
62
+ var metrics = mlContext . BinaryClassification . EvaluateNonCalibrated ( transformedTestData ) ;
63
+ PrintMetrics ( metrics ) ;
64
+
65
+ // Expected output:
66
+ // Accuracy: 0.89
67
+ // AUC: 0.96
68
+ // F1 Score: 0.88
69
+ // Negative Precision: 0.87
70
+ // Negative Recall: 0.92
71
+ // Positive Precision: 0.91
72
+ // Positive Recall: 0.85
73
+ }
74
+
75
+ private static IEnumerable < DataPoint > GenerateRandomDataPoints ( int count , int seed = 0 )
76
+ {
77
+ var random = new Random ( seed ) ;
78
+ float randomFloat ( ) => ( float ) random . NextDouble ( ) ;
79
+ for ( int i = 0 ; i < count ; i ++ )
80
+ {
81
+ var label = randomFloat ( ) > 0.5f ;
82
+ yield return new DataPoint
83
+ {
84
+ Label = label ,
85
+ // Create random features that are correlated with the label.
86
+ // For data points with false label, the feature values are slightly increased by adding a constant.
87
+ Features = Enumerable . Repeat ( label , 50 ) . Select ( x => x ? randomFloat ( ) : randomFloat ( ) + 0.1f ) . ToArray ( )
88
+ } ;
89
+ }
90
+ }
91
+
92
+ // Example with label and 50 feature values. A data set is a collection of such examples.
93
+ private class DataPoint
94
+ {
95
+ public bool Label { get ; set ; }
96
+ [ VectorType ( 50 ) ]
97
+ public float [ ] Features { get ; set ; }
98
+ }
99
+
100
+ // Class used to capture predictions.
101
+ private class Prediction
102
+ {
103
+ // Original label.
104
+ public bool Label { get ; set ; }
105
+ // Predicted label from the trainer.
106
+ public bool PredictedLabel { get ; set ; }
107
+ }
108
+
109
+ // Pretty-print BinaryClassificationMetrics objects.
110
+ private static void PrintMetrics ( BinaryClassificationMetrics metrics )
111
+ {
112
+ Console . WriteLine ( $ "Accuracy: { metrics . Accuracy : F2} ") ;
113
+ Console . WriteLine ( $ "AUC: { metrics . AreaUnderRocCurve : F2} ") ;
114
+ Console . WriteLine ( $ "F1 Score: { metrics . F1Score : F2} ") ;
115
+ Console . WriteLine ( $ "Negative Precision: { metrics . NegativePrecision : F2} ") ;
116
+ Console . WriteLine ( $ "Negative Recall: { metrics . NegativeRecall : F2} ") ;
117
+ Console . WriteLine ( $ "Positive Precision: { metrics . PositivePrecision : F2} ") ;
118
+ Console . WriteLine ( $ "Positive Recall: { metrics . PositiveRecall : F2} ") ;
53
119
}
54
120
}
55
- }
121
+ }
0 commit comments