Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/web_benchmarks/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
* Add `flutter_frame.total_time`, `flutter_frame.build_time`, and `flutter_frame.raster_time`
metrics to benchmark results. These values are derived from the Flutter `FrameTiming` API.
* Expose a new library `metrics.dart` that contains definitions for the benchmark metrics.
* Add p50, p90, and p95 metrics for benchmark scores.

## 3.0.0

Expand Down
83 changes: 83 additions & 0 deletions packages/web_benchmarks/lib/src/metrics.dart
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,89 @@ enum BenchmarkMetric {
/// from the Blink trace summary.
const String totalUiFrameAverage = 'totalUiFrame.average';

/// Describes the values computed for each [BenchmarkMetric].
sealed class BenchmarkMetricComputation {
const BenchmarkMetricComputation(this.name);

/// The name of each metric computation.
final String name;

/// The name for the computed value tracking the average value of the measured
/// samples without outliers.
static const NamedMetricComputation average =
NamedMetricComputation._('average');

/// The name for the computed value tracking the average of outlier samples.
static const NamedMetricComputation outlierAverage =
NamedMetricComputation._('outlierAverage');

/// The name for the computed value tracking the outlier average divided by
/// the clean average.
static const NamedMetricComputation outlierRatio =
NamedMetricComputation._('outlierRatio');

/// The name for the computed value tracking the noise as a multiple of the
/// [average] value takes from clean samples.
static const NamedMetricComputation noise = NamedMetricComputation._('noise');

/// The name for the computed value tracking the 50th percentile value from
/// the samples with outliers.
static const PercentileMetricComputation p50 =
PercentileMetricComputation._('p50', 0.5);

/// The name for the computed value tracking the 90th percentile value from
/// the samples with outliers.
static const PercentileMetricComputation p90 =
PercentileMetricComputation._('p90', 0.9);

/// The name for the computed value tracking the 95th percentile value from
/// the samples with outliers.
static const PercentileMetricComputation p95 =
PercentileMetricComputation._('p95', 0.95);

/// All of the computed vales for each [BenchmarkMetric].
static const List<BenchmarkMetricComputation> values =
<BenchmarkMetricComputation>[
average,
outlierAverage,
outlierRatio,
noise,
p50,
p90,
p95,
];
}

/// A [BenchmarkMetricComputation] with a descriptive name.
final class NamedMetricComputation extends BenchmarkMetricComputation {
const NamedMetricComputation._(super.name);
}

/// A [BenchmarkMetricComputation] describing a percentile (p50, p90, etc.).
final class PercentileMetricComputation extends BenchmarkMetricComputation {
const PercentileMetricComputation._(super.name, this.percentile)
: assert(percentile >= 0.0 && percentile <= 1.0);

/// The percentile value as a double.
///
/// This value must be between 0.0 and 1.0.
final double percentile;

/// The percentile [BenchmarkMetricComputation]s computed for each benchmark
/// metric.
static const List<PercentileMetricComputation> values =
<PercentileMetricComputation>[
BenchmarkMetricComputation.p50,
BenchmarkMetricComputation.p90,
BenchmarkMetricComputation.p95,
];

/// The percentile values as doubles computed for each benchmark metric.
static List<double> percentilesAsDoubles = PercentileMetricComputation.values
.map((PercentileMetricComputation value) => value.percentile)
.toList();
}

/// The list of expected benchmark metrics for the current compilation mode, as
/// determined by the value of [useWasm].
List<BenchmarkMetric> expectedBenchmarkMetrics({required bool useWasm}) {
Expand Down
44 changes: 37 additions & 7 deletions packages/web_benchmarks/lib/src/recorder.dart
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import 'package:web/web.dart' as html;

import 'common.dart';
import 'metrics.dart';
import 'timeseries.dart';

/// The number of samples from warm-up iterations.
///
Expand Down Expand Up @@ -692,6 +693,13 @@ class Timeseries {
final double outlierAverage =
outliers.isNotEmpty ? _computeAverage(name, outliers) : cleanAverage;

// Compute percentile values (e.g. p50, p90, p95).
final Map<double, double> percentiles = computePercentiles(
name,
PercentileMetricComputation.percentilesAsDoubles,
candidateValues,
);

final List<AnnotatedSample> annotatedValues = <AnnotatedSample>[
for (final double warmUpValue in warmUpValues)
AnnotatedSample(
Expand All @@ -714,6 +722,7 @@ class Timeseries {
outlierAverage: outlierAverage,
standardDeviation: standardDeviation,
noise: noise,
percentiles: percentiles,
cleanSampleCount: cleanValues.length,
outlierSampleCount: outliers.length,
samples: annotatedValues,
Expand Down Expand Up @@ -747,6 +756,7 @@ class TimeseriesStats {
required this.outlierAverage,
required this.standardDeviation,
required this.noise,
required this.percentiles,
required this.cleanSampleCount,
required this.outlierSampleCount,
required this.samples,
Expand All @@ -761,14 +771,21 @@ class TimeseriesStats {
/// The standard deviation in the measured samples without outliers.
final double standardDeviation;

/// The noise as a multiple of the [average] value takes from clean samples.
/// The noise as a multiple of the [average] value taken from clean samples.
///
/// This value can be multiplied by 100.0 to get noise as a percentage of
/// the average.
///
/// If [average] is zero, treats the result as perfect score, returns zero.
final double noise;

/// The percentile values (p50, p90, p95, etc.) for the measured samples with
/// outliers.
///
/// This [Map] is from percentile targets (e.g. 0.50 for p50, 0.90 for p90,
/// etc.) to the computed value for the [samples].
final Map<double, double> percentiles;

/// The maximum value a sample can have without being considered an outlier.
///
/// See [Timeseries.computeStats] for details on how this value is computed.
Expand Down Expand Up @@ -815,6 +832,12 @@ class TimeseriesStats {
buffer.writeln(' | outlier average: $outlierAverage μs');
buffer.writeln(' | outlier/clean ratio: ${outlierRatio}x');
buffer.writeln(' | noise: ${_ratioToPercent(noise)}');
for (final MapEntry<double, double> percentileEntry
in percentiles.entries) {
buffer.writeln(
' | p${(percentileEntry.key * 100).round()}: ${percentileEntry.value} μs',
);
}
return buffer.toString();
}
}
Expand Down Expand Up @@ -942,18 +965,25 @@ class Profile {
final Timeseries timeseries = scoreData[key]!;

if (timeseries.isReported) {
scoreKeys.add('$key.average');
scoreKeys.add('$key.${BenchmarkMetricComputation.average.name}');
// Report `outlierRatio` rather than `outlierAverage`, because
// the absolute value of outliers is less interesting than the
// ratio.
scoreKeys.add('$key.outlierRatio');
scoreKeys.add('$key.${BenchmarkMetricComputation.outlierRatio.name}');
}

final TimeseriesStats stats = timeseries.computeStats();
json['$key.average'] = stats.average;
json['$key.outlierAverage'] = stats.outlierAverage;
json['$key.outlierRatio'] = stats.outlierRatio;
json['$key.noise'] = stats.noise;
json['$key.${BenchmarkMetricComputation.average.name}'] = stats.average;
json['$key.${BenchmarkMetricComputation.outlierAverage.name}'] =
stats.outlierAverage;
json['$key.${BenchmarkMetricComputation.outlierRatio.name}'] =
stats.outlierRatio;
json['$key.${BenchmarkMetricComputation.noise.name}'] = stats.noise;
for (final MapEntry<double, double> percentileEntry
in stats.percentiles.entries) {
json['$key.p${(percentileEntry.key * 100).round()}'] =
percentileEntry.value;
}
}

json.addAll(extraData);
Expand Down
44 changes: 44 additions & 0 deletions packages/web_benchmarks/lib/src/timeseries.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Copyright 2013 The Flutter Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

import 'package:collection/collection.dart';

// TODO(kenz): move the time series logic from recorder.dart into this file for
// better code separation.

/// Computes the percentile threshold in [values] for the given [percentiles].
///
/// Each value in [percentiles] should be between 0.0 and 1.0.
///
/// Returns a [Map] of percentile values to the computed value from [values].
Map<double, double> computePercentiles(
String label,
List<double> percentiles,
Iterable<double> values,
) {
if (values.isEmpty) {
throw StateError(
'$label: attempted to compute a percentile of an empty value list.',
);
}
for (final double percentile in percentiles) {
if (percentile < 0.0 || percentile > 1.0) {
throw StateError(
'$label: attempted to compute a percentile for an invalid '
'value: $percentile',
);
}
}

final List<double> sorted =
values.sorted((double a, double b) => a.compareTo(b));
final Map<double, double> computed = <double, double>{};
for (final double percentile in percentiles) {
final int percentileIndex =
(sorted.length * percentile).round().clamp(0, sorted.length - 1);
computed[percentile] = sorted[percentileIndex];
}

return computed;
}
24 changes: 24 additions & 0 deletions packages/web_benchmarks/test/src/timeseries_test.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright 2013 The Flutter Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

import 'package:flutter_test/flutter_test.dart';
import 'package:web_benchmarks/src/timeseries.dart';

void main() {
group('Timeseries', () {
test('computePercentiles', () {
final Map<double, double> computed = computePercentiles(
'test',
<double>[0.0, 0.5, 0.9, 0.95, 1.0],
List<double>.generate(100, (int i) => i.toDouble()),
);
expect(computed.length, 5);
expect(computed[0.0], 0.0);
expect(computed[0.5], 50.0);
expect(computed[0.9], 90.0);
expect(computed[0.95], 95.0);
expect(computed[1.0], 99.0);
});
});
}
Original file line number Diff line number Diff line change
Expand Up @@ -98,22 +98,19 @@ Future<BenchmarkResults> _runBenchmarks({

for (final String benchmarkName in benchmarkNames) {
for (final String metricName in expectedMetrics) {
for (final String valueName in <String>[
'average',
'outlierAverage',
'outlierRatio',
'noise',
]) {
for (final BenchmarkMetricComputation computation
in BenchmarkMetricComputation.values) {
expect(
taskResult.scores[benchmarkName]!.where((BenchmarkScore score) =>
score.metric == '$metricName.$valueName'),
hasLength(1),
);
taskResult.scores[benchmarkName]!.where((BenchmarkScore score) =>
score.metric == '$metricName.${computation.name}'),
hasLength(1),
reason: 'Expected to find a metric named '
'$metricName.${computation.name}');
}
}
expect(
taskResult.scores[benchmarkName]!.where(
(BenchmarkScore score) => score.metric == 'totalUiFrame.average'),
taskResult.scores[benchmarkName]!
.where((BenchmarkScore score) => score.metric == totalUiFrameAverage),
hasLength(1),
);
}
Expand Down