Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions src/tools/ResultsComparer/CommandLineOptions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Collections.Generic;
using System.IO;
using CommandLine;
using CommandLine.Text;

namespace ResultsComparer
{
public class CommandLineOptions
{
[Option("base", HelpText = "Path to the folder/file with base results.")]
public string BasePath { get; set; }
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should perform input validation here. For example,

{
  get => _base;
  set
  {
    if (string.IsNullOrWhiteSpace(value))
      throw new ArgumentException("some message");
    if (!Directory.Exists(value))
      throw new DirectoryNotFoundException("some message");
    // maybe check that the directory has the right files?
    _base = value;
  }
}


[Option("diff", HelpText = "Path to the folder/file with diff results.")]
public string DiffPath { get; set; }

[Option("threshold", Required = true, HelpText = "Threshold for Statistical Test. Examples: 5%, 10ms, 100ns, 1s.")]
public string StatisticalTestThreshold { get; set; }

[Option("noise", HelpText = "Noise threshold for Statistical Test. The difference for 1.0ns and 1.1ns is 10%, but it's just a noise. Examples: 0.5ns 1ns.", Default = "0.3ns" )]
public string NoiseThreshold { get; set; }

[Option("top", HelpText = "Filter the diff to top/bottom N results. Optional.")]
public int? TopCount { get; set; }

[Option("csv", HelpText = "Path to exported CSV results. Optional.")]
public FileInfo CsvPath { get; set; }

[Usage(ApplicationAlias = "")]
public static IEnumerable<Example> Examples
{
get
{
yield return new Example(@"Compare the results stored in 'C:\results\win' (base) vs 'C:\results\unix' (diff) using 5% threshold.",
new CommandLineOptions { BasePath = @"C:\results\win", DiffPath = @"C:\results\unix", StatisticalTestThreshold = "5%" });
yield return new Example(@"Compare the results stored in 'C:\results\win' (base) vs 'C:\results\unix' (diff) using 5% threshold and show only top/bottom 10 results.",
new CommandLineOptions { BasePath = @"C:\results\win", DiffPath = @"C:\results\unix", StatisticalTestThreshold = "5%", TopCount = 10 });
yield return new Example(@"Compare the results stored in 'C:\results\win' (base) vs 'C:\results\unix' (diff) using 5% threshold and 0.5ns noise filter.",
new CommandLineOptions { BasePath = @"C:\results\win", DiffPath = @"C:\results\unix", StatisticalTestThreshold = "5%", NoiseThreshold = "0.5ns" });
}
}
}
}
133 changes: 133 additions & 0 deletions src/tools/ResultsComparer/DataTransferContracts.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

// <auto-generated />

using System.Collections.Generic;
using System.Linq;

namespace DataTransferContracts // generated with http://json2csharp.com/#
{
public class ChronometerFrequency
{
public int Hertz { get; set; }
}

public class HostEnvironmentInfo
{
public string BenchmarkDotNetCaption { get; set; }
public string BenchmarkDotNetVersion { get; set; }
public string OsVersion { get; set; }
public string ProcessorName { get; set; }
public int? PhysicalProcessorCount { get; set; }
public int? PhysicalCoreCount { get; set; }
public int? LogicalCoreCount { get; set; }
public string RuntimeVersion { get; set; }
public string Architecture { get; set; }
public bool? HasAttachedDebugger { get; set; }
public bool? HasRyuJit { get; set; }
public string Configuration { get; set; }
public string JitModules { get; set; }
public string DotNetCliVersion { get; set; }
public ChronometerFrequency ChronometerFrequency { get; set; }
public string HardwareTimerKind { get; set; }
}

public class ConfidenceInterval
{
public int N { get; set; }
public double Mean { get; set; }
public double StandardError { get; set; }
public int Level { get; set; }
public double Margin { get; set; }
public double Lower { get; set; }
public double Upper { get; set; }
}

public class Percentiles
{
public double P0 { get; set; }
public double P25 { get; set; }
public double P50 { get; set; }
public double P67 { get; set; }
public double P80 { get; set; }
public double P85 { get; set; }
public double P90 { get; set; }
public double P95 { get; set; }
public double P100 { get; set; }
}

public class Statistics
{
public int N { get; set; }
public double Min { get; set; }
public double LowerFence { get; set; }
public double Q1 { get; set; }
public double Median { get; set; }
public double Mean { get; set; }
public double Q3 { get; set; }
public double UpperFence { get; set; }
public double Max { get; set; }
public double InterquartileRange { get; set; }
public List<double> LowerOutliers { get; set; }
public List<double> UpperOutliers { get; set; }
public List<double> AllOutliers { get; set; }
public double StandardError { get; set; }
public double Variance { get; set; }
public double StandardDeviation { get; set; }
public double Skewness { get; set; }
public double Kurtosis { get; set; }
public ConfidenceInterval ConfidenceInterval { get; set; }
public Percentiles Percentiles { get; set; }
}

public class Memory
{
public int Gen0Collections { get; set; }
public int Gen1Collections { get; set; }
public int Gen2Collections { get; set; }
public int TotalOperations { get; set; }
public long BytesAllocatedPerOperation { get; set; }
}

public class Measurement
{
public string IterationStage { get; set; }
public int LaunchIndex { get; set; }
public int IterationIndex { get; set; }
public long Operations { get; set; }
public double Nanoseconds { get; set; }
}

public class Benchmark
{
public string DisplayInfo { get; set; }
public object Namespace { get; set; }
public string Type { get; set; }
public string Method { get; set; }
public string MethodTitle { get; set; }
public string Parameters { get; set; }
public string FullName { get; set; }
public Statistics Statistics { get; set; }
public Memory Memory { get; set; }
public List<Measurement> Measurements { get; set; }

/// <summary>
/// this method was not auto-generated by a tool, it was added manually
/// </summary>
/// <returns>an array of the actual workload results (not warmup, not pilot)</returns>
internal double[] GetOriginalValues()
=> Measurements
.Where(measurement => measurement.IterationStage == "Result")
.Select(measurement => measurement.Nanoseconds / measurement.Operations)
.ToArray();
}

public class BdnResult
{
public string Title { get; set; }
public HostEnvironmentInfo HostEnvironmentInfo { get; set; }
public List<Benchmark> Benchmarks { get; set; }
}
}
14 changes: 14 additions & 0 deletions src/tools/ResultsComparer/NuGet.Config
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<?xml version="1.0" encoding="utf-8"?>
<configuration>
<solution>
<add key="disableSourceControlIntegration" value="true" />
</solution>
<packageSources>
<!--To inherit the global NuGet package sources remove the <clear/> line below -->
<clear />

<add key="api.nuget.org" value="https://api.nuget.org/v3/index.json" />
<add key="benchmarkdotnet-ci-feed" value="https://ci.appveyor.com/nuget/benchmarkdotnet" />
<add key="dotnet-core" value="https://dotnet.myget.org/F/dotnet-core/api/v3/index.json" />
</packageSources>
</configuration>
183 changes: 183 additions & 0 deletions src/tools/ResultsComparer/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Threading;
using BenchmarkDotNet.Mathematics;
using BenchmarkDotNet.Mathematics.StatisticalTesting;
using CommandLine;
using DataTransferContracts;
using MarkdownLog;
using Newtonsoft.Json;

namespace ResultsComparer
{
public class Program
{
private const string FullBdnJsonFileExtension = "full.json";

public static void Main(string[] args)
{
// we print a lot of numbers here and we want to make it always in invariant way
Thread.CurrentThread.CurrentCulture = CultureInfo.InvariantCulture;

Parser.Default.ParseArguments<CommandLineOptions>(args).WithParsed(Compare);
}

private static void Compare(CommandLineOptions args)
{
if (!Threshold.TryParse(args.StatisticalTestThreshold, out var testThreshold))
{
Console.WriteLine($"Invalid Threshold {args.StatisticalTestThreshold}. Examples: 5%, 10ms, 100ns, 1s.");
return;
}
if (!Threshold.TryParse(args.NoiseThreshold, out var noiseThreshold))
{
Console.WriteLine($"Invalid Noise Threshold {args.NoiseThreshold}. Examples: 0.3ns 1ns.");
return;
}

var notSame = GetNotSameResults(args, testThreshold, noiseThreshold).ToArray();

PrintTable(notSame, EquivalenceTestConclusion.Slower, args);
PrintTable(notSame, EquivalenceTestConclusion.Faster, args);

ExportToCsv(notSame, args.CsvPath);
}

private static IEnumerable<(string id, Benchmark baseResult, Benchmark diffResult, EquivalenceTestConclusion conclusion)> GetNotSameResults(CommandLineOptions args, Threshold testThreshold, Threshold noiseThreshold)
{
foreach (var pair in ReadResults(args)
.Where(result => result.baseResult.Statistics != null && result.diffResult.Statistics != null)) // failures
{
var baseValues = pair.baseResult.GetOriginalValues();
var diffValues = pair.diffResult.GetOriginalValues();

var userTresholdResult = StatisticalTestHelper.CalculateTost(MannWhitneyTest.Instance, baseValues, diffValues, testThreshold);
if (userTresholdResult.Conclusion == EquivalenceTestConclusion.Same)
continue;

var noiseResult = StatisticalTestHelper.CalculateTost(MannWhitneyTest.Instance, baseValues, diffValues, noiseThreshold);
if (noiseResult.Conclusion == EquivalenceTestConclusion.Same)
continue;

yield return (pair.id, pair.baseResult, pair.diffResult, userTresholdResult.Conclusion);
}
}

private static void PrintTable((string id, Benchmark baseResult, Benchmark diffResult, EquivalenceTestConclusion conclusion)[] notSame, EquivalenceTestConclusion conclusion, CommandLineOptions args)
{
var data = notSame
.Where(result => result.conclusion == conclusion)
.OrderByDescending(result => GetRatio(conclusion, result.baseResult, result.diffResult))
.Take(args.TopCount ?? int.MaxValue)
.Select(result => new {
Id = result.id.Length > 80 ? result.id.Substring(0, 80) : result.id,
DisplayValue = GetRatio(conclusion, result.baseResult, result.diffResult),
BaseMedian = result.baseResult.Statistics.Median,
DiffMedian = result.diffResult.Statistics.Median,
Modality = GetModalInfo(result.baseResult) ?? GetModalInfo(result.diffResult)
})
.ToArray();

var table = data.ToMarkdownTable().WithHeaders(conclusion.ToString(), conclusion == EquivalenceTestConclusion.Faster ? "base/diff" : "diff/base", "Base Median (ns)", "Diff Median (ns)", "Modality");

foreach (var line in table.ToMarkdown().Split(Environment.NewLine, StringSplitOptions.RemoveEmptyEntries))
Console.WriteLine($"| {line.TrimStart()}|"); // the table starts with \t and does not end with '|' and it looks bad so we fix it

Console.WriteLine();
}

private static IEnumerable<(string id, Benchmark baseResult, Benchmark diffResult)> ReadResults(CommandLineOptions args)
{
var baseFiles = GetFilesToParse(args.BasePath);
var diffFiles = GetFilesToParse(args.DiffPath);

if (!baseFiles.Any() || !diffFiles.Any())
throw new ArgumentException($"Provided paths contained no {FullBdnJsonFileExtension} files.");

var baseResults = baseFiles.Select(ReadFromFile);
var diffResults = diffFiles.Select(ReadFromFile);

var benchmarkIdToDiffResults = diffResults.SelectMany(result => result.Benchmarks).ToDictionary(benchmarkResult => benchmarkResult.FullName, benchmarkResult => benchmarkResult);

return baseResults
.SelectMany(result => result.Benchmarks)
.ToDictionary(benchmarkResult => benchmarkResult.FullName, benchmarkResult => benchmarkResult) // we use ToDictionary to make sure the results have unique IDs
.Where(baseResult => benchmarkIdToDiffResults.ContainsKey(baseResult.Key))
.Select(baseResult => (baseResult.Key, baseResult.Value, benchmarkIdToDiffResults[baseResult.Key]));
}

private static void ExportToCsv((string id, Benchmark baseResult, Benchmark diffResult, EquivalenceTestConclusion conclusion)[] notSame, FileInfo csvPath)
{
if (csvPath == null)
return;

if (csvPath.Exists)
csvPath.Delete();

using (var textWriter = csvPath.CreateText())
{
foreach (var result in notSame)
{
textWriter.WriteLine($"\"{result.id.Replace("\"", "\"\"")}\";base;{result.conclusion};{string.Join(';', result.baseResult.GetOriginalValues())}");
textWriter.WriteLine($"\"{result.id.Replace("\"", "\"\"")}\";diff;{result.conclusion};{string.Join(';', result.diffResult.GetOriginalValues())}");
}
}

Console.WriteLine($"CSV results exported to {csvPath.FullName}");
}

private static string[] GetFilesToParse(string path)
{
if (Directory.Exists(path))
return Directory.GetFiles(path, $"*{FullBdnJsonFileExtension}", SearchOption.AllDirectories);
else if (File.Exists(path) || !path.EndsWith(FullBdnJsonFileExtension))
return new[] { path };
else
throw new FileNotFoundException($"Provided path does NOT exist or is not a {path} file", path);
}

// code and magic values taken from BenchmarkDotNet.Analysers.MultimodalDistributionAnalyzer
// See http://www.brendangregg.com/FrequencyTrails/modes.html
private static string GetModalInfo(Benchmark benchmark)
{
if (benchmark.Statistics.N < 12) // not enough data to tell
return null;

double mValue = MathHelper.CalculateMValue(new BenchmarkDotNet.Mathematics.Statistics(benchmark.GetOriginalValues()));
if (mValue > 4.2)
return "multimodal";
else if (mValue > 3.2)
return "bimodal";
else if (mValue > 2.8)
return "several?";

return null;
}

private static double GetRatio(EquivalenceTestConclusion conclusion, Benchmark baseResult, Benchmark diffResult)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think my comment got lost in the refactor - why are we reporting straight ratios instead of relative percentages? Typically I expect to see this calculation look something like (diff-base)/base (in the case where a higher number is better.) I'd also like to see slower (regressions) represented as negative deltas instead of reversing the calculation as is done here.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that base/diff, diff/base, and diff-base)/base is very subjective. For example: your preferences are different than @AndyAyersMS which are also different than @stephentoub ;)

What I have learned in BDN is that users want to customize everything, but it typically adds too much complexity to the code.

For example here to keep everyone happy I would need to introduce a new console argument, add docs for it and handle all cases in sorting the results, formatting them and aligning in the table. I don't have time for it, but I would be happy to review a PR if somebody is willing to implement it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it's that subjective. Using straight ratios leads to weird things sometimes - going from 10 to 8 by your method would show as 1.25, but I would think of as a -20% regression from 10. Reversing the terms so that smaller ratios are "worse", always dividing by the base, results in 0.8, which is also a non-obvious way to present the data but closer to something that makes sense in terms of how a data point relates to the previous data point. It gets weird in another way when you invert and higher numbers are worse - imagine a working set measurement going from 1235 pages loaded to 2342 pages. Ratio would tell us it is 1.9, while it is a -47% regression.

For this very specific purpose it may not matter much, but I think there is value in a consistent method for reporting data like this which makes sense across contexts. Given that this tool is not likely to be used for things other than manual investigations we can let it lie, but I'd like to take this back up in a more global sense as we move forward.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds like I will have to polish up my newsletter explaining all the ways ratios are vastly superior to other comparative measures...

More importantly, though: I always prefer to see things reported as diff/base, whether as a percentage or ratio or whatnot, so a single column sort can order things and we can plot distributions without having to do extra math.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@billwert Different developers have different preferences when it comes down to how their data is represented. In the past developers have asked for base/diff, diff/base, diff-base/base, etc. We should just make sure that we are consistent and provide transparent w.r.t. the way output data is presented.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@AndyAyersMS I would like to read that newsletter. (Though I don't quite understand how ratios achieve sorting in ways that deltas do not.)

@jorive

We should just make sure that we are consistent and provide transparent w.r.t. the way output data is presented.

Agreed. I'm trying to identify which way that should go. :)

=> conclusion == EquivalenceTestConclusion.Faster
? baseResult.Statistics.Median / diffResult.Statistics.Median
: diffResult.Statistics.Median / baseResult.Statistics.Median;

private static BdnResult ReadFromFile(string resultFilePath)
{
try
{
return JsonConvert.DeserializeObject<BdnResult>(File.ReadAllText(resultFilePath));
}
catch (JsonSerializationException)
{
Console.WriteLine($"Exception while reading the {resultFilePath} file.");

throw;
}
}
}
}
Binary file added src/tools/ResultsComparer/README.md
Binary file not shown.
13 changes: 13 additions & 0 deletions src/tools/ResultsComparer/ResultsComparer.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>netcoreapp2.1</TargetFramework>
<LangVersion>latest</LangVersion>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="CommandLineParser" Version="2.3.0" />
<PackageReference Include="MarkdownLog.NS20" Version="0.10.1" />
<PackageReference Include="Newtonsoft.Json" Version="12.0.1" />
<PackageReference Include="BenchmarkDotNet" Version="0.11.3.889" />
</ItemGroup>
</Project>
Loading