dotnet · antoniovs1029 · Dec 9, 2020 · Sep 8, 2020 · Sep 9, 2020 · Sep 12, 2020
diff --git a/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs b/src/Microsoft.ML.Data/Evaluators/Metrics/MulticlassClassificationMetrics.cs
@@ -4,6 +4,7 @@
 
 using System.Collections.Generic;
 using System.Collections.Immutable;
+using System.Linq;
 using Microsoft.ML.Runtime;
 
 namespace Microsoft.ML.Data
@@ -81,6 +82,11 @@ public sealed class MulticlassClassificationMetrics
         /// </summary>
         public int TopKPredictionCount { get; }
 
+        /// <summary>
+        /// Returns the top K for all K from 1 to the number of classes
+        /// </summary>
+        public IReadOnlyList<double> TopKAccuracyForAllK { get; }
+
         /// <summary>
         /// Gets the log-loss of the classifier for each class. Log-loss measures the performance of a classifier
         /// with respect to how much the predicted probabilities diverge from the true class label. Lower
@@ -114,9 +120,10 @@ internal MulticlassClassificationMetrics(IHost host, DataViewRow overallResult,
             MacroAccuracy = FetchDouble(MulticlassClassificationEvaluator.AccuracyMacro);
             LogLoss = FetchDouble(MulticlassClassificationEvaluator.LogLoss);
             LogLossReduction = FetchDouble(MulticlassClassificationEvaluator.LogLossReduction);
+            TopKAccuracyForAllK = RowCursorUtils.Fetch<VBuffer<double>>(host, overallResult, MulticlassClassificationEvaluator.AllTopKAccuracy).DenseValues().ToImmutableArray();
             TopKPredictionCount = topKPredictionCount;
             if (topKPredictionCount > 0)
-                TopKAccuracy = FetchDouble(MulticlassClassificationEvaluator.TopKAccuracy);
+                TopKAccuracy = TopKAccuracyForAllK[topKPredictionCount-1];
 
             var perClassLogLoss = RowCursorUtils.Fetch<VBuffer<double>>(host, overallResult, MulticlassClassificationEvaluator.PerClassLogLoss);
             PerClassLogLoss = perClassLogLoss.DenseValues().ToImmutableArray();

diff --git a/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs b/src/Microsoft.ML.Data/Evaluators/MulticlassClassificationEvaluator.cs
@@ -41,6 +41,7 @@ public sealed class Arguments
         public const string AccuracyMicro = "Accuracy(micro-avg)";
         public const string AccuracyMacro = "Accuracy(macro-avg)";
         public const string TopKAccuracy = "Top K accuracy";
+        public const string AllTopKAccuracy = "Top K accuracy(All K)";
         public const string PerClassLogLoss = "Per class log-loss";
         public const string LogLoss = "Log-loss";
         public const string LogLossReduction = "Log-loss reduction";
@@ -60,15 +61,13 @@ public enum Metrics
         internal const string LoadName = "MultiClassClassifierEvaluator";
 
         private readonly int? _outputTopKAcc;
-        private readonly bool _names;
 
         public MulticlassClassificationEvaluator(IHostEnvironment env, Arguments args)
             : base(env, LoadName)
         {
             Host.AssertValue(args, "args");
             Host.CheckUserArg(args.OutputTopKAcc == null || args.OutputTopKAcc > 0, nameof(args.OutputTopKAcc));
             _outputTopKAcc = args.OutputTopKAcc;
-            _names = args.Names;
         }
 
         private protected override void CheckScoreAndLabelTypes(RoleMappedSchema schema)
@@ -147,6 +146,7 @@ private protected override void GetAggregatorConsolidationFuncs(Aggregator aggre
             var logLoss = new List<double>();
             var logLossRed = new List<double>();
             var topKAcc = new List<double>();
+            var allTopK = new List<double[]>();
             var perClassLogLoss = new List<double[]>();
             var counts = new List<double[]>();
             var weights = new List<double[]>();
@@ -172,6 +172,7 @@ private protected override void GetAggregatorConsolidationFuncs(Aggregator aggre
                     logLossRed.Add(agg.UnweightedCounters.Reduction);
                     if (agg.UnweightedCounters.OutputTopKAcc > 0)
                         topKAcc.Add(agg.UnweightedCounters.TopKAccuracy);
+                    allTopK.Add(agg.UnweightedCounters.AllTopKAccuracy);
                     perClassLogLoss.Add(agg.UnweightedCounters.PerClassLogLoss);
 
                     confStratCol.AddRange(agg.UnweightedCounters.ConfusionTable.Select(x => stratColKey));
@@ -189,6 +190,7 @@ private protected override void GetAggregatorConsolidationFuncs(Aggregator aggre
                         logLossRed.Add(agg.WeightedCounters.Reduction);
                         if (agg.WeightedCounters.OutputTopKAcc > 0)
                             topKAcc.Add(agg.WeightedCounters.TopKAccuracy);
+                        allTopK.Add(agg.WeightedCounters.AllTopKAccuracy);
                         perClassLogLoss.Add(agg.WeightedCounters.PerClassLogLoss);
                         weights.AddRange(agg.WeightedCounters.ConfusionTable);
                     }
@@ -213,6 +215,11 @@ private protected override void GetAggregatorConsolidationFuncs(Aggregator aggre
                         overallDvBldr.AddColumn(TopKAccuracy, NumberDataViewType.Double, topKAcc.ToArray());
                     overallDvBldr.AddColumn(PerClassLogLoss, aggregator.GetSlotNames, NumberDataViewType.Double, perClassLogLoss.ToArray());
 
+                    ValueGetter<VBuffer<ReadOnlyMemory<char>>> getKSlotNames =
+                        (ref VBuffer<ReadOnlyMemory<char>> dst) =>
+                            dst = new VBuffer<ReadOnlyMemory<char>>(allTopK.First().Length, Enumerable.Range(1,allTopK.First().Length).Select(i=>new ReadOnlyMemory<char>(($"K={i.ToString()}").ToCharArray())).ToArray());
+                    overallDvBldr.AddColumn(AllTopKAccuracy, getKSlotNames, NumberDataViewType.Double, allTopK.ToArray());
+
                     var confDvBldr = new ArrayDataViewBuilder(Host);
                     if (hasStrats)
                     {
@@ -246,9 +253,11 @@ public sealed class Counters
                 private double _totalLogLoss;
                 private double _numInstances;
                 private double _numCorrect;
-                private double _numCorrectTopK;
+                private int _numUnknownClassInstances;
                 private readonly double[] _sumWeightsOfClass;
                 private readonly double[] _totalPerClassLogLoss;
+                private readonly long[] _seenRanks;
+
                 public readonly double[][] ConfusionTable;
 
                 public double MicroAvgAccuracy { get { return _numInstances > 0 ? _numCorrect / _numInstances : 0; } }
@@ -291,7 +300,8 @@ public double Reduction
                     }
                 }
 
-                public double TopKAccuracy { get { return _numInstances > 0 ? _numCorrectTopK / _numInstances : 0; } }
+                public double TopKAccuracy => !(OutputTopKAcc is null) ? AllTopKAccuracy[OutputTopKAcc.Value] : 0d;
+                public double[] AllTopKAccuracy => CumulativeSum(_seenRanks.Select(l => l / (double)(_numInstances - _numUnknownClassInstances))).ToArray();
 
                 // The per class average log loss is calculated by dividing the weighted sum of the log loss of examples
                 // in each class by the total weight of examples in that class.
@@ -316,14 +326,12 @@ public Counters(int numClasses, int? outputTopKAcc)
                     ConfusionTable = new double[numClasses][];
                     for (int i = 0; i < ConfusionTable.Length; i++)
                         ConfusionTable[i] = new double[numClasses];
+
+                    _seenRanks = new long[numClasses + 1];
                 }
 
-                public void Update(int[] indices, double loglossCurr, int label, float weight)
+                public void Update(int seenRank, int assigned, double loglossCurr, int label, float weight)
                 {
-                    Contracts.Assert(Utils.Size(indices) == _numClasses);
-
-                    int assigned = indices[0];
-
                     _numInstances += weight;
 
                     if (label < _numClasses)
@@ -334,23 +342,34 @@ public void Update(int[] indices, double loglossCurr, int label, float weight)
                     if (label < _numClasses)
                         _totalPerClassLogLoss[label] += loglossCurr * weight;
 
-                    if (assigned == label)
+                    _seenRanks[seenRank]++;
+
+                    if (seenRank == 0) //prediction matched label
                     {
                         _numCorrect += weight;
                         ConfusionTable[label][label] += weight;
-                        _numCorrectTopK += weight;
                     }
                     else if (label < _numClasses)
                     {
-                        if (OutputTopKAcc > 0)
-                        {
-                            int idx = Array.IndexOf(indices, label);
-                            if (0 <= idx && idx < OutputTopKAcc)
-                                _numCorrectTopK += weight;
-                        }
                         ConfusionTable[label][assigned] += weight;
                     }
+                    else
+                    {
+                        _numUnknownClassInstances++;
+                    }
+                }
+
+                private static IEnumerable<double> CumulativeSum(IEnumerable<double> s)
+                {
+                    double sum = 0;
+                    ;
+                    foreach (var x in s)
+                    {
+                        sum += x;
+                        yield return sum;
+                    }
                 }
+
             }
 
             private ValueGetter<float> _labelGetter;
@@ -359,7 +378,6 @@ public void Update(int[] indices, double loglossCurr, int label, float weight)
 
             private VBuffer<float> _scores;
             private readonly float[] _scoresArr;
-            private int[] _indicesArr;
 
             private const float Epsilon = (float)1e-15;
 
@@ -380,6 +398,7 @@ public Aggregator(IHostEnvironment env, ReadOnlyMemory<char>[] classNames, int s
                 Host.Assert(Utils.Size(classNames) == scoreVectorSize);
 
                 _scoresArr = new float[scoreVectorSize];
+
                 UnweightedCounters = new Counters(scoreVectorSize, outputTopKAcc);
                 Weighted = weighted;
                 WeightedCounters = Weighted ? new Counters(scoreVectorSize, outputTopKAcc) : null;
@@ -400,6 +419,7 @@ internal override void InitializeNextPass(DataViewRow row, RoleMappedSchema sche
 
                 if (schema.Weight.HasValue)
                     _weightGetter = row.GetGetter<float>(schema.Weight.Value);
+
             }
 
             public override void ProcessRow()
@@ -437,16 +457,12 @@ public override void ProcessRow()
                     }
                 }
 
-                // Sort classes by prediction strength.
-                // Use stable OrderBy instead of Sort(), which may give different results on different machines.
-                if (Utils.Size(_indicesArr) < _scoresArr.Length)
-                    _indicesArr = new int[_scoresArr.Length];
-                int j = 0;
-                foreach (var index in Enumerable.Range(0, _scoresArr.Length).OrderByDescending(i => _scoresArr[i]))
-                    _indicesArr[j++] = index;
-
                 var intLabel = (int)label;
 
+                var assigned = Array.IndexOf(_scoresArr, _scoresArr.Max()); //perf could be improved
+
+                var wasKnownLabel = true;
+
                 // log-loss
                 double logloss;
                 if (intLabel < _scoresArr.Length)
@@ -461,11 +477,21 @@ public override void ProcessRow()
                     // Penalize logloss if the label was not seen during training
                     logloss = -Math.Log(Epsilon);
                     _numUnknownClassInstances++;
+                    wasKnownLabel = false;
                 }
 
-                UnweightedCounters.Update(_indicesArr, logloss, intLabel, 1);
+                // Get the probability that the CORRECT label has: (best case is that it's the highest probability):
+                var correctProba = !wasKnownLabel ? 0 : _scoresArr[intLabel];
+
+                // Find the rank of the *correct* label (in Scores[]). If 0 => Good, correct. And the lower the better.
+                // The rank will be from 0 to N. (Not N-1).
+                // Problem: What if we have probabilities that are equal to the correct prediction (eg, .6 .1 .1 .1 .1).
+                // This actually happens a lot with some models. Here we assign the worst rank in the case of a tie (so 4 in this example)
+                var correctRankWorstCase = !wasKnownLabel ? _scoresArr.Length : _scoresArr.Count(score => score >= correctProba) - 1;
+
+                UnweightedCounters.Update(correctRankWorstCase, assigned, logloss, intLabel, 1);
                 if (WeightedCounters != null)
-                    WeightedCounters.Update(_indicesArr, logloss, intLabel, weight);
+                    WeightedCounters.Update(correctRankWorstCase, assigned, logloss, intLabel, weight);
             }
 
             protected override List<string> GetWarningsCore()
@@ -909,6 +935,7 @@ private protected override IDataView CombineOverallMetricsCore(IDataView[] metri
             for (int i = 0; i < metrics.Length; i++)
             {
                 var idv = metrics[i];
+                idv = DropAllTopKColumn(idv);
                 if (!_outputPerClass)
                     idv = DropPerClassColumn(idv);
 
@@ -964,6 +991,15 @@ private IDataView DropPerClassColumn(IDataView input)
             return input;
         }
 
+        private IDataView DropAllTopKColumn(IDataView input)
+        {
+            if (input.Schema.TryGetColumnIndex(MulticlassClassificationEvaluator.AllTopKAccuracy, out int AllTopKCol))
+            {
+                input = ColumnSelectingTransformer.CreateDrop(Host, input, MulticlassClassificationEvaluator.AllTopKAccuracy);
+            }
+            return input;
+        }
+
         public override IEnumerable<MetricColumn> GetOverallMetricColumns()
         {
             yield return new MetricColumn("AccuracyMicro", MulticlassClassificationEvaluator.AccuracyMicro);

diff --git a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.key-out.txt b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.key-out.txt
@@ -23,6 +23,10 @@ Accuracy(micro-avg): 0.936709
 Accuracy(macro-avg): 0.942857
 Log-loss:           0.285741
 Log-loss reduction: 0.737254
+Top K accuracy(All K)K=1: 0.936709
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 Confusion table
           ||========================
@@ -37,6 +41,10 @@ Accuracy(micro-avg): 0.957746
 Accuracy(macro-avg): 0.953030
 Log-loss:           0.160970
 Log-loss reduction: 0.851729
+Top K accuracy(All K)K=1: 0.957746
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------

diff --git a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt
@@ -23,6 +23,10 @@ Accuracy(micro-avg): 0.936709
 Accuracy(macro-avg): 0.942857
 Log-loss:           0.285741
 Log-loss reduction: 0.737254
+Top K accuracy(All K)K=1: 0.936709
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 Confusion table
           ||========================
@@ -37,6 +41,10 @@ Accuracy(micro-avg): 0.957746
 Accuracy(macro-avg): 0.953030
 Log-loss:           0.160970
 Log-loss reduction: 0.851729
+Top K accuracy(All K)K=1: 0.957746
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------

diff --git a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.key-out.txt b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.key-out.txt
@@ -18,6 +18,10 @@ Accuracy(micro-avg): 0.973333
 Accuracy(macro-avg): 0.973333
 Log-loss:           0.161048
 Log-loss reduction: 0.853408
+Top K accuracy(All K)K=1: 0.973333
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------

diff --git a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.keyU404-out.txt b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.keyU404-out.txt
@@ -18,6 +18,10 @@ Accuracy(micro-avg): 0.973333
 Accuracy(macro-avg): 0.973333
 Log-loss:           0.161048
 Log-loss reduction: 0.853408
+Top K accuracy(All K)K=1: 0.973333
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------

diff --git a/.../BaselineOutput/Common/MultiClassNaiveBayes/MultiClassNaiveBayes-CV-breast-cancer-out.txt b/.../BaselineOutput/Common/MultiClassNaiveBayes/MultiClassNaiveBayes-CV-breast-cancer-out.txt
@@ -16,6 +16,9 @@ Accuracy(micro-avg): 0.629834
 Accuracy(macro-avg): 0.500000
 Log-loss:           34.538776
 Log-loss reduction: -51.407404
+Top K accuracy(All K)K=1: 0.629834
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
 
 Confusion table
           ||======================
@@ -29,6 +32,9 @@ Accuracy(micro-avg): 0.682493
 Accuracy(macro-avg): 0.500000
 Log-loss:           34.538776
 Log-loss reduction: -54.264136
+Top K accuracy(All K)K=1: 0.682493
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------

diff --git a/...neOutput/Common/MultiClassNaiveBayes/MultiClassNaiveBayes-TrainTest-breast-cancer-out.txt b/...neOutput/Common/MultiClassNaiveBayes/MultiClassNaiveBayes-TrainTest-breast-cancer-out.txt
@@ -14,6 +14,9 @@ Accuracy(micro-avg): 0.655222
 Accuracy(macro-avg): 0.500000
 Log-loss:           34.538776
 Log-loss reduction: -52.618809
+Top K accuracy(All K)K=1: 0.655222
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------

diff --git a/...utput/Common/MulticlassLogisticRegression/LogisticRegression-Non-Negative-CV-iris-out.txt b/...utput/Common/MulticlassLogisticRegression/LogisticRegression-Non-Negative-CV-iris-out.txt
@@ -25,6 +25,10 @@ Accuracy(micro-avg): 0.962025
 Accuracy(macro-avg): 0.965079
 Log-loss:           0.129858
 Log-loss reduction: 0.880592
+Top K accuracy(All K)K=1: 0.962025
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 Confusion table
           ||========================
@@ -39,6 +43,10 @@ Accuracy(micro-avg): 0.971831
 Accuracy(macro-avg): 0.966667
 Log-loss:           0.125563
 Log-loss reduction: 0.884343
+Top K accuracy(All K)K=1: 0.971831
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------

diff --git a/...ommon/MulticlassLogisticRegression/LogisticRegression-Non-Negative-TrainTest-iris-out.txt b/...ommon/MulticlassLogisticRegression/LogisticRegression-Non-Negative-TrainTest-iris-out.txt
@@ -19,6 +19,10 @@ Accuracy(micro-avg): 0.980000
 Accuracy(macro-avg): 0.980000
 Log-loss:           0.095534
 Log-loss reduction: 0.913041
+Top K accuracy(All K)K=1: 0.980000
+Top K accuracy(All K)K=2: 1.000000
+Top K accuracy(All K)K=3: 1.000000
+Top K accuracy(All K)K=4: 1.000000
 
 OVERALL RESULTS
 ---------------------------------------