Skip to content

Commit

Permalink
Perf improvement for TopK Accuracy and return all topK in Classificat…
Browse files Browse the repository at this point in the history
…ion Evaluator (#5395)

* Fix for issue 744

* cleanup

* fixing report output

* fixedTestReferenceOutputs

* Fixed test reference outputs for NetCore31

* change top k acc output string format

* Ranking algorithm now uses first appearance in dataset rather than worstCase

* fixed benchmark

* various minor changes from code review

* limit TopK to OutputTopKAcc parameter

* top k output name changes

* make old TopK readOnly

* restored old baselineOutputs since respecting outputTopK param means no topK in most test output

* fix test fails, re-add names parameter

* Clean up commented code

* that'll teach me to edit from the github webpage

* use existing method, fix nits

* Slight comment change

* Comment change / Touch to kick off build pipeline

* fix whitespace

* Added new test

* Code formatting nits

* Code formatting nit

* Fixed undefined rankofCorrectLabel and trailing whitespace warning

* Removed _numUnknownClassInstances and added test for unknown labels

* Add weight to seenRanks

* Nits

* Removed FastTree import

Co-authored-by: Antonio Velazquez <anvelazq@microsoft.com>
Co-authored-by: Justin Ormont <justinormont@users.noreply.github.com>
  • Loading branch information
3 people committed Dec 9, 2020
1 parent 0c6238e commit 6a413ed
Show file tree
Hide file tree
Showing 9 changed files with 203 additions and 63 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,7 @@ private static TMetrics GetAverageMetrics(IEnumerable<TMetrics> metrics, TMetric
logLoss: GetAverageOfNonNaNScores(newMetrics.Select(x => x.LogLoss)),
logLossReduction: GetAverageOfNonNaNScores(newMetrics.Select(x => x.LogLossReduction)),
topKPredictionCount: newMetrics.ElementAt(0).TopKPredictionCount,
topKAccuracy: GetAverageOfNonNaNScores(newMetrics.Select(x => x.TopKAccuracy)),
// Return PerClassLogLoss and ConfusionMatrix from the fold closest to average score
topKAccuracies: GetAverageOfNonNaNScoresInNestedEnumerable(newMetrics.Select(x => x.TopKAccuracyForAllK)),
perClassLogLoss: (metricsClosestToAvg as MulticlassClassificationMetrics).PerClassLogLoss.ToArray(),
confusionMatrix: (metricsClosestToAvg as MulticlassClassificationMetrics).ConfusionMatrix);
return result as TMetrics;
Expand Down Expand Up @@ -163,7 +162,6 @@ private static double[] GetAverageOfNonNaNScoresInNestedEnumerable(IEnumerable<I
double[] arr = new double[results.ElementAt(0).Count()];
for (int i = 0; i < arr.Length; i++)
{
Contracts.Assert(arr.Length == results.ElementAt(i).Count());
arr[i] = GetAverageOfNonNaNScores(results.Select(x => x.ElementAt(i)));
}
return arr;
Expand Down
8 changes: 7 additions & 1 deletion src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1035,7 +1035,13 @@ private static List<string> GetMetricNames(IChannel ch, DataViewSchema schema, D
names = editor.Commit();
}
foreach (var name in names.Items(all: true))
metricNames.Add(string.Format("{0}{1}", metricName, name.Value));
{
var tryNaming = string.Format(metricName, name.Value);
if (tryNaming == metricName) // metricName wasn't a format string, so just append slotname
tryNaming = (string.Format("{0}{1}", metricName, name.Value));

metricNames.Add(tryNaming);
}
}
}
ch.Assert(metricNames.Count == metricCount);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Linq;
using Microsoft.ML.Runtime;

namespace Microsoft.ML.Data
Expand Down Expand Up @@ -71,16 +73,22 @@ public sealed class MulticlassClassificationMetrics
public double MicroAccuracy { get; }

/// <summary>
/// If <see cref="TopKPredictionCount"/> is positive, this is the relative number of examples where
/// the true label is one of the top-k predicted labels by the predictor.
/// Convenience method for "TopKAccuracyForAllK[TopKPredictionCount - 1]". If <see cref="TopKPredictionCount"/> is positive,
/// this is the relative number of examples where
/// the true label is one of the top K predicted labels by the predictor.
/// </summary>
public double TopKAccuracy { get; }
public double TopKAccuracy => TopKAccuracyForAllK?.LastOrDefault() ?? 0;

/// <summary>
/// If positive, this indicates the K in <see cref="TopKAccuracy"/>.
/// If positive, this indicates the K in <see cref="TopKAccuracy"/> and <see cref="TopKAccuracyForAllK"/>.
/// </summary>
public int TopKPredictionCount { get; }

/// <summary>
/// Returns the top K accuracy for all K from 1 to the value of TopKPredictionCount.
/// </summary>
public IReadOnlyList<double> TopKAccuracyForAllK { get; }

/// <summary>
/// Gets the log-loss of the classifier for each class. Log-loss measures the performance of a classifier
/// with respect to how much the predicted probabilities diverge from the true class label. Lower
Expand Down Expand Up @@ -115,29 +123,30 @@ internal MulticlassClassificationMetrics(IHost host, DataViewRow overallResult,
LogLoss = FetchDouble(MulticlassClassificationEvaluator.LogLoss);
LogLossReduction = FetchDouble(MulticlassClassificationEvaluator.LogLossReduction);
TopKPredictionCount = topKPredictionCount;

if (topKPredictionCount > 0)
TopKAccuracy = FetchDouble(MulticlassClassificationEvaluator.TopKAccuracy);
TopKAccuracyForAllK = RowCursorUtils.Fetch<VBuffer<double>>(host, overallResult, MulticlassClassificationEvaluator.AllTopKAccuracy).DenseValues().ToImmutableArray();

var perClassLogLoss = RowCursorUtils.Fetch<VBuffer<double>>(host, overallResult, MulticlassClassificationEvaluator.PerClassLogLoss);
PerClassLogLoss = perClassLogLoss.DenseValues().ToImmutableArray();
ConfusionMatrix = MetricWriter.GetConfusionMatrix(host, confusionMatrix, binary: false, perClassLogLoss.Length);
}

internal MulticlassClassificationMetrics(double accuracyMicro, double accuracyMacro, double logLoss, double logLossReduction,
int topKPredictionCount, double topKAccuracy, double[] perClassLogLoss)
int topKPredictionCount, double[] topKAccuracies, double[] perClassLogLoss)
{
MicroAccuracy = accuracyMicro;
MacroAccuracy = accuracyMacro;
LogLoss = logLoss;
LogLossReduction = logLossReduction;
TopKPredictionCount = topKPredictionCount;
TopKAccuracy = topKAccuracy;
TopKAccuracyForAllK = topKAccuracies;
PerClassLogLoss = perClassLogLoss.ToImmutableArray();
}

internal MulticlassClassificationMetrics(double accuracyMicro, double accuracyMacro, double logLoss, double logLossReduction,
int topKPredictionCount, double topKAccuracy, double[] perClassLogLoss, ConfusionMatrix confusionMatrix)
: this(accuracyMicro, accuracyMacro, logLoss, logLossReduction, topKPredictionCount, topKAccuracy, perClassLogLoss)
int topKPredictionCount, double[] topKAccuracies, double[] perClassLogLoss, ConfusionMatrix confusionMatrix)
: this(accuracyMicro, accuracyMacro, logLoss, logLossReduction, topKPredictionCount, topKAccuracies, perClassLogLoss)
{
ConfusionMatrix = confusionMatrix;
}
Expand Down
Loading

0 comments on commit 6a413ed

Please sign in to comment.