Skip to content

Commit 9daf5ca

Browse files
authored
Improve packed field decoding (#959)
- Inline `_readPacked` manually and `_withLimit` with a pragma to eliminate closure allocation and calls in packed decoding loops. - `_readPacked` is manually inlined as VM's inliner doesn't properly fold constants after the inlinings, see dart-lang/sdk#60068. - Introduce `PbList._addUnchecked` to add to the list without checking the value for validity and list for mutability. - When decoding a packed field, check the list mutability once, instead of for every element. - When decoding a packed scalar field, don't check for value validity. For scalar fields we need to make sure the field value is not null, which is already guaranteed in the call sites as e.g. `input.readDouble` doesn't return nullable. - Sprinkle a bunch of `prefer-inline`s to make sure VM will inline one liners. VM benchmarks before: ``` protobuf_PackedInt32Decoding(RunTimeRaw): 25598.8125 us. protobuf_PackedInt64Decoding(RunTimeRaw): 67932.43333333333 us. protobuf_PackedUint32Decoding(RunTimeRaw): 24668.844444444443 us. protobuf_PackedUint64Decoding(RunTimeRaw): 64615.066666666666 us. protobuf_PackedSint32Decoding(RunTimeRaw): 26037.275 us. protobuf_PackedSint64Decoding(RunTimeRaw): 100819.65 us. protobuf_PackedBoolDecoding(RunTimeRaw): 34733.4 us. protobuf_PackedEnumDecoding(RunTimeRaw): 48379.659999999996 us. ``` VM benchmarks after: ``` protobuf_PackedInt32Decoding(RunTimeRaw): 19653.9 us. protobuf_PackedInt64Decoding(RunTimeRaw): 48627.9 us. protobuf_PackedUint32Decoding(RunTimeRaw): 19279.29090909091 us. protobuf_PackedUint64Decoding(RunTimeRaw): 50681.8 us. protobuf_PackedSint32Decoding(RunTimeRaw): 20271.854545454546 us. protobuf_PackedSint64Decoding(RunTimeRaw): 83777.8 us. protobuf_PackedBoolDecoding(RunTimeRaw): 24850.555555555555 us. protobuf_PackedEnumDecoding(RunTimeRaw): 45205.659999999996 us. ``` Wasm benchmarks before (`-O2`): ``` protobuf_PackedInt32Decoding(RunTimeRaw): 64220.0 us. protobuf_PackedInt64Decoding(RunTimeRaw): 81033.33333333334 us. protobuf_PackedUint32Decoding(RunTimeRaw): 60800.0 us. protobuf_PackedUint64Decoding(RunTimeRaw): 82700.0 us. protobuf_PackedSint32Decoding(RunTimeRaw): 72433.33333333334 us. protobuf_PackedSint64Decoding(RunTimeRaw): 142150.0 us. protobuf_PackedBoolDecoding(RunTimeRaw): 27775.0 us. protobuf_PackedEnumDecoding(RunTimeRaw): 43980.0 us. ``` Wasm benchmarks after: ``` protobuf_PackedInt32Decoding(RunTimeRaw): 56050.0 us. protobuf_PackedInt64Decoding(RunTimeRaw): 74633.33333333334 us. protobuf_PackedUint32Decoding(RunTimeRaw): 56525.0 us. protobuf_PackedUint64Decoding(RunTimeRaw): 69400.0 us. protobuf_PackedSint32Decoding(RunTimeRaw): 51925.0 us. protobuf_PackedSint64Decoding(RunTimeRaw): 116250.0 us. protobuf_PackedBoolDecoding(RunTimeRaw): 18427.272727272728 us. protobuf_PackedEnumDecoding(RunTimeRaw): 41600.0 us. ```
1 parent 1aaa332 commit 9daf5ca

File tree

10 files changed

+418
-37
lines changed

10 files changed

+418
-37
lines changed
Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
// Copyright (c) 2025, the Dart project authors. Please see the AUTHORS file
2+
// for details. All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
5+
import 'dart:math';
6+
import 'dart:typed_data';
7+
8+
import 'package:fixnum/fixnum.dart';
9+
import 'package:protobuf_benchmarks/benchmark_base.dart';
10+
import 'package:protobuf_benchmarks/generated/packed_fields.pb.dart';
11+
12+
PackedFields? sink;
13+
14+
class PackedInt32DecodingBenchmark extends BenchmarkBase {
15+
late final Uint8List encoded;
16+
17+
PackedInt32DecodingBenchmark() : super('PackedInt32Decoding') {
18+
final rand = Random(123);
19+
final message = PackedFields();
20+
for (var i = 0; i < 1000000; i += 1) {
21+
message.packedInt32.add(rand.nextInt(2147483647));
22+
}
23+
encoded = message.writeToBuffer();
24+
}
25+
26+
@override
27+
void run() {
28+
sink = PackedFields()..mergeFromBuffer(encoded);
29+
}
30+
}
31+
32+
class PackedInt64DecodingBenchmark extends BenchmarkBase {
33+
late final Uint8List encoded;
34+
35+
PackedInt64DecodingBenchmark() : super('PackedInt64Decoding') {
36+
final rand = Random(123);
37+
final message = PackedFields();
38+
for (var i = 0; i < 1000000; i += 1) {
39+
// Note: `Random` cannot generate more than the number below.
40+
message.packedInt64.add(Int64(rand.nextInt(4294967296)));
41+
}
42+
encoded = message.writeToBuffer();
43+
}
44+
45+
@override
46+
void run() {
47+
sink = PackedFields()..mergeFromBuffer(encoded);
48+
}
49+
}
50+
51+
class PackedUint32DecodingBenchmark extends BenchmarkBase {
52+
late final Uint8List encoded;
53+
54+
PackedUint32DecodingBenchmark() : super('PackedUint32Decoding') {
55+
final rand = Random(123);
56+
final message = PackedFields();
57+
for (var i = 0; i < 1000000; i += 1) {
58+
message.packedUint32.add(rand.nextInt(4294967295));
59+
}
60+
encoded = message.writeToBuffer();
61+
}
62+
63+
@override
64+
void run() {
65+
sink = PackedFields()..mergeFromBuffer(encoded);
66+
}
67+
}
68+
69+
class PackedUint64DecodingBenchmark extends BenchmarkBase {
70+
late final Uint8List encoded;
71+
72+
PackedUint64DecodingBenchmark() : super('PackedUint64Decoding') {
73+
final rand = Random(123);
74+
final message = PackedFields();
75+
for (var i = 0; i < 1000000; i += 1) {
76+
// Note: `Random` cannot generate more than the number below.
77+
message.packedUint64.add(Int64(rand.nextInt(4294967296)));
78+
}
79+
encoded = message.writeToBuffer();
80+
}
81+
82+
@override
83+
void run() {
84+
sink = PackedFields()..mergeFromBuffer(encoded);
85+
}
86+
}
87+
88+
class PackedSint32DecodingBenchmark extends BenchmarkBase {
89+
late final Uint8List encoded;
90+
91+
PackedSint32DecodingBenchmark() : super('PackedSint32Decoding') {
92+
final rand = Random(123);
93+
final message = PackedFields();
94+
for (var i = 0; i < 1000000; i += 1) {
95+
message.packedSint32.add(rand.nextInt(2147483647));
96+
}
97+
encoded = message.writeToBuffer();
98+
}
99+
100+
@override
101+
void run() {
102+
sink = PackedFields()..mergeFromBuffer(encoded);
103+
}
104+
}
105+
106+
class PackedSint64DecodingBenchmark extends BenchmarkBase {
107+
late final Uint8List encoded;
108+
109+
PackedSint64DecodingBenchmark() : super('PackedSint64Decoding') {
110+
final rand = Random(123);
111+
final message = PackedFields();
112+
for (var i = 0; i < 1000000; i += 1) {
113+
// Note: `Random` cannot generate more than the number below.
114+
message.packedSint64.add(Int64(rand.nextInt(4294967296)));
115+
}
116+
encoded = message.writeToBuffer();
117+
}
118+
119+
@override
120+
void run() {
121+
sink = PackedFields()..mergeFromBuffer(encoded);
122+
}
123+
}
124+
125+
class PackedBoolDecodingBenchmark extends BenchmarkBase {
126+
late final Uint8List encoded;
127+
128+
PackedBoolDecodingBenchmark() : super('PackedBoolDecoding') {
129+
final rand = Random(123);
130+
final message = PackedFields();
131+
for (var i = 0; i < 1000000; i += 1) {
132+
message.packedBool.add(rand.nextBool());
133+
}
134+
encoded = message.writeToBuffer();
135+
}
136+
137+
@override
138+
void run() {
139+
sink = PackedFields()..mergeFromBuffer(encoded);
140+
}
141+
}
142+
143+
class PackedEnumDecodingBenchmark extends BenchmarkBase {
144+
late final Uint8List encoded;
145+
146+
PackedEnumDecodingBenchmark() : super('PackedEnumDecoding') {
147+
final rand = Random(123);
148+
final message = PackedFields();
149+
final numEnums = Enum.values.length;
150+
for (var i = 0; i < 1000000; i += 1) {
151+
message.packedEnum.add(Enum.values[rand.nextInt(numEnums)]);
152+
}
153+
encoded = message.writeToBuffer();
154+
}
155+
156+
@override
157+
void run() {
158+
sink = PackedFields()..mergeFromBuffer(encoded);
159+
}
160+
}
161+
162+
void main() {
163+
PackedInt32DecodingBenchmark().report();
164+
PackedInt64DecodingBenchmark().report();
165+
PackedUint32DecodingBenchmark().report();
166+
PackedUint64DecodingBenchmark().report();
167+
PackedSint32DecodingBenchmark().report();
168+
PackedSint64DecodingBenchmark().report();
169+
PackedBoolDecodingBenchmark().report();
170+
PackedEnumDecodingBenchmark().report();
171+
172+
if (int.parse('1') == 0) print(sink);
173+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
syntax = "proto3";
2+
3+
message PackedFields {
4+
repeated int32 packedInt32 = 1 [packed = true];
5+
repeated int64 packedInt64 = 2 [packed = true];
6+
repeated uint32 packedUint32 = 3 [packed = true];
7+
repeated uint64 packedUint64 = 4 [packed = true];
8+
repeated sint32 packedSint32 = 5 [packed = true];
9+
repeated sint64 packedSint64 = 6 [packed = true];
10+
repeated bool packedBool = 7 [packed = true];
11+
repeated Enum packedEnum = 8 [packed = true];
12+
}
13+
14+
enum Enum {
15+
ENUM_1 = 0;
16+
ENUM_2 = 1;
17+
ENUM_3 = 2;
18+
ENUM_4 = 4;
19+
ENUM_5 = 5;
20+
}

benchmarks/tool/compile_protos.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ SIMPLE_PROTOS=(
1212
"protos/google_message1_proto2.proto"
1313
"protos/google_message1_proto3.proto"
1414
"protos/google_message2.proto"
15+
"protos/packed_fields.proto"
1516
)
1617

1718
set -x

protobuf/CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
## 4.0.1-wip
2+
3+
* Improve packed field decoding performance. ([#959])
4+
5+
[#959]: https://github.com/google/protobuf.dart/pull/959
6+
17
## 4.0.0
28

39
* **Breaking:** The following types and members are now removed:

0 commit comments

Comments
 (0)