flutter · kenzieschmoll · Oct 2, 2024 · Oct 1, 2024 · Oct 2, 2024 · Oct 2, 2024
@@ -3,6 +3,7 @@
 * Add `flutter_frame.total_time`, `flutter_frame.build_time`, and `flutter_frame.raster_time`
 metrics to benchmark results. These values are derived from the Flutter `FrameTiming` API.
 * Expose a new library `metrics.dart` that contains definitions for the benchmark metrics.
+* Add p50, p90, and p95 metrics for benchmark scores.
 
 ## 3.0.0
 

@@ -50,6 +50,89 @@ enum BenchmarkMetric {
 /// from the Blink trace summary.
 const String totalUiFrameAverage = 'totalUiFrame.average';
 
+/// Describes the values computed for each [BenchmarkMetric].
+sealed class BenchmarkMetricComputation {
+  const BenchmarkMetricComputation(this.name);
+
+  /// The name of each metric computation.
+  final String name;
+
+  /// The name for the computed value tracking the average value of the measured
+  /// samples without outliers.
+  static const NamedMetricComputation average =
+      NamedMetricComputation._('average');
+
+  /// The name for the computed value tracking the average of outlier samples.
+  static const NamedMetricComputation outlierAverage =
+      NamedMetricComputation._('outlierAverage');
+
+  /// The name for the computed value tracking the outlier average divided by
+  /// the clean average.
+  static const NamedMetricComputation outlierRatio =
+      NamedMetricComputation._('outlierRatio');
+
+  /// The name for the computed value tracking the noise as a multiple of the
+  /// [average] value takes from clean samples.
+  static const NamedMetricComputation noise = NamedMetricComputation._('noise');
+
+  /// The name for the computed value tracking the 50th percentile value from
+  /// the samples with outliers.
+  static const PercentileMetricComputation p50 =
+      PercentileMetricComputation._('p50', 0.5);
+
+  /// The name for the computed value tracking the 90th percentile value from
+  /// the samples with outliers.
+  static const PercentileMetricComputation p90 =
+      PercentileMetricComputation._('p90', 0.9);
+
+  /// The name for the computed value tracking the 95th percentile value from
+  /// the samples with outliers.
+  static const PercentileMetricComputation p95 =
+      PercentileMetricComputation._('p95', 0.95);
+
+  /// All of the computed vales for each [BenchmarkMetric].
+  static const List<BenchmarkMetricComputation> values =
+      <BenchmarkMetricComputation>[
+    average,
+    outlierAverage,
+    outlierRatio,
+    noise,
+    p50,
+    p90,
+    p95,
+  ];
+}
+
+/// A [BenchmarkMetricComputation] with a descriptive name.
+final class NamedMetricComputation extends BenchmarkMetricComputation {
+  const NamedMetricComputation._(super.name);
+}
+
+/// A [BenchmarkMetricComputation] describing a percentile (p50, p90, etc.).
+final class PercentileMetricComputation extends BenchmarkMetricComputation {
+  const PercentileMetricComputation._(super.name, this.percentile)
+      : assert(percentile >= 0.0 && percentile <= 1.0);
+
+  /// The percentile value as a double.
+  ///
+  /// This value must be between 0.0 and 1.0.
+  final double percentile;
+
+  /// The percentile [BenchmarkMetricComputation]s computed for each benchmark
+  /// metric.
+  static const List<PercentileMetricComputation> values =
+      <PercentileMetricComputation>[
+    BenchmarkMetricComputation.p50,
+    BenchmarkMetricComputation.p90,
+    BenchmarkMetricComputation.p95,
+  ];
+
+  /// The percentile values as doubles computed for each benchmark metric.
+  static List<double> percentilesAsDoubles = PercentileMetricComputation.values
+      .map((PercentileMetricComputation value) => value.percentile)
+      .toList();
+}
+
 /// The list of expected benchmark metrics for the current compilation mode, as
 /// determined by the value of [useWasm].
 List<BenchmarkMetric> expectedBenchmarkMetrics({required bool useWasm}) {

@@ -19,6 +19,7 @@ import 'package:web/web.dart' as html;
 
 import 'common.dart';
 import 'metrics.dart';
+import 'timeseries.dart';
 
 /// The number of samples from warm-up iterations.
 ///
@@ -692,6 +693,13 @@ class Timeseries {
     final double outlierAverage =
         outliers.isNotEmpty ? _computeAverage(name, outliers) : cleanAverage;
 
+    // Compute percentile values (e.g. p50, p90, p95).
+    final Map<double, double> percentiles = computePercentiles(
+      name,
+      PercentileMetricComputation.percentilesAsDoubles,
+      candidateValues,
+    );
+
     final List<AnnotatedSample> annotatedValues = <AnnotatedSample>[
       for (final double warmUpValue in warmUpValues)
         AnnotatedSample(
@@ -714,6 +722,7 @@ class Timeseries {
       outlierAverage: outlierAverage,
       standardDeviation: standardDeviation,
       noise: noise,
+      percentiles: percentiles,
       cleanSampleCount: cleanValues.length,
       outlierSampleCount: outliers.length,
       samples: annotatedValues,
@@ -747,6 +756,7 @@ class TimeseriesStats {
     required this.outlierAverage,
     required this.standardDeviation,
     required this.noise,
+    required this.percentiles,
     required this.cleanSampleCount,
     required this.outlierSampleCount,
     required this.samples,
@@ -761,14 +771,21 @@ class TimeseriesStats {
   /// The standard deviation in the measured samples without outliers.
   final double standardDeviation;
 
-  /// The noise as a multiple of the [average] value takes from clean samples.
+  /// The noise as a multiple of the [average] value taken from clean samples.
   ///
   /// This value can be multiplied by 100.0 to get noise as a percentage of
   /// the average.
   ///
   /// If [average] is zero, treats the result as perfect score, returns zero.
   final double noise;
 
+  /// The percentile values (p50, p90, p95, etc.) for the measured samples with
+  /// outliers.
+  ///
+  /// This [Map] is from percentile targets (e.g. 0.50 for p50, 0.90 for p90,
+  /// etc.) to the computed value for the [samples].
+  final Map<double, double> percentiles;
+
   /// The maximum value a sample can have without being considered an outlier.
   ///
   /// See [Timeseries.computeStats] for details on how this value is computed.
@@ -815,6 +832,12 @@ class TimeseriesStats {
     buffer.writeln(' | outlier average: $outlierAverage μs');
     buffer.writeln(' | outlier/clean ratio: ${outlierRatio}x');
     buffer.writeln(' | noise: ${_ratioToPercent(noise)}');
+    for (final MapEntry<double, double> percentileEntry
+        in percentiles.entries) {
+      buffer.writeln(
+        ' | p${(percentileEntry.key * 100).round()}: ${percentileEntry.value} μs',
+      );
+    }
     return buffer.toString();
   }
 }
@@ -942,18 +965,25 @@ class Profile {
       final Timeseries timeseries = scoreData[key]!;
 
       if (timeseries.isReported) {
-        scoreKeys.add('$key.average');
+        scoreKeys.add('$key.${BenchmarkMetricComputation.average.name}');
         // Report `outlierRatio` rather than `outlierAverage`, because
         // the absolute value of outliers is less interesting than the
         // ratio.
-        scoreKeys.add('$key.outlierRatio');
+        scoreKeys.add('$key.${BenchmarkMetricComputation.outlierRatio.name}');
       }
 
       final TimeseriesStats stats = timeseries.computeStats();
-      json['$key.average'] = stats.average;
-      json['$key.outlierAverage'] = stats.outlierAverage;
-      json['$key.outlierRatio'] = stats.outlierRatio;
-      json['$key.noise'] = stats.noise;
+      json['$key.${BenchmarkMetricComputation.average.name}'] = stats.average;
+      json['$key.${BenchmarkMetricComputation.outlierAverage.name}'] =
+          stats.outlierAverage;
+      json['$key.${BenchmarkMetricComputation.outlierRatio.name}'] =
+          stats.outlierRatio;
+      json['$key.${BenchmarkMetricComputation.noise.name}'] = stats.noise;
+      for (final MapEntry<double, double> percentileEntry
+          in stats.percentiles.entries) {
+        json['$key.p${(percentileEntry.key * 100).round()}'] =
+            percentileEntry.value;
+      }
     }
 
     json.addAll(extraData);

diff --git a/packages/web_benchmarks/lib/src/timeseries.dart b/packages/web_benchmarks/lib/src/timeseries.dart
@@ -0,0 +1,44 @@
+// Copyright 2013 The Flutter Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+import 'package:collection/collection.dart';
+
+// TODO(kenz): move the time series logic from recorder.dart into this file for
+// better code separation.
+
+/// Computes the percentile threshold in [values] for the given [percentiles].
+///
+/// Each value in [percentiles] should be between 0.0 and 1.0.
+///
+/// Returns a [Map] of percentile values to the computed value from [values].
+Map<double, double> computePercentiles(
+  String label,
+  List<double> percentiles,
+  Iterable<double> values,
+) {
+  if (values.isEmpty) {
+    throw StateError(
+      '$label: attempted to compute a percentile of an empty value list.',
+    );
+  }
+  for (final double percentile in percentiles) {
+    if (percentile < 0.0 || percentile > 1.0) {
+      throw StateError(
+        '$label: attempted to compute a percentile for an invalid '
+        'value: $percentile',
+      );
+    }
+  }
+
+  final List<double> sorted =
+      values.sorted((double a, double b) => a.compareTo(b));
+  final Map<double, double> computed = <double, double>{};
+  for (final double percentile in percentiles) {
+    final int percentileIndex =
+        (sorted.length * percentile).round().clamp(0, sorted.length - 1);
+    computed[percentile] = sorted[percentileIndex];
+  }
+
+  return computed;
+}
diff --git a/packages/web_benchmarks/test/src/timeseries_test.dart b/packages/web_benchmarks/test/src/timeseries_test.dart
@@ -0,0 +1,24 @@
+// Copyright 2013 The Flutter Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+import 'package:flutter_test/flutter_test.dart';
+import 'package:web_benchmarks/src/timeseries.dart';
+
+void main() {
+  group('Timeseries', () {
+    test('computePercentiles', () {
+      final Map<double, double> computed = computePercentiles(
+        'test',
+        <double>[0.0, 0.5, 0.9, 0.95, 1.0],
+        List<double>.generate(100, (int i) => i.toDouble()),
+      );
+      expect(computed.length, 5);
+      expect(computed[0.0], 0.0);
+      expect(computed[0.5], 50.0);
+      expect(computed[0.9], 90.0);
+      expect(computed[0.95], 95.0);
+      expect(computed[1.0], 99.0);
+    });
+  });
+}
@@ -98,22 +98,19 @@ Future<BenchmarkResults> _runBenchmarks({
 
   for (final String benchmarkName in benchmarkNames) {
     for (final String metricName in expectedMetrics) {
-      for (final String valueName in <String>[
-        'average',
-        'outlierAverage',
-        'outlierRatio',
-        'noise',
-      ]) {
+      for (final BenchmarkMetricComputation computation
+          in BenchmarkMetricComputation.values) {
         expect(
-          taskResult.scores[benchmarkName]!.where((BenchmarkScore score) =>
-              score.metric == '$metricName.$valueName'),
-          hasLength(1),
-        );
+            taskResult.scores[benchmarkName]!.where((BenchmarkScore score) =>
+                score.metric == '$metricName.${computation.name}'),
+            hasLength(1),
+            reason: 'Expected to find a metric named '
+                '$metricName.${computation.name}');
       }
     }
     expect(
-      taskResult.scores[benchmarkName]!.where(
-          (BenchmarkScore score) => score.metric == 'totalUiFrame.average'),
+      taskResult.scores[benchmarkName]!
+          .where((BenchmarkScore score) => score.metric == totalUiFrameAverage),
       hasLength(1),
     );
   }