Skip to content

Commit 6f0944c

Browse files
authored
Fix oneof serialization with proto3 field presence (#292)
= Description The serialization of a oneof message that contains a message with fields with explicit presence was buggy. For example: ``` message A { oneof kind { B b = 1; C c = 2; } } message B {} message C { optional bool z = 1; } ``` Serializing `A(b=B())` would lead to this payload: ``` 0A # tag1, length delimited 00 # length: 0 12 # tag2, length delimited 00 # length: 0 ``` Which when deserialized, leads to the message `A(c=C())`. = Explanation The issue lies in the post_init method. All fields are introspected, and if different from PLACEHOLDER, the message is marked as having been "serialized_on_wire". Then, when serializing `A(b=B())`, we go through each field of the oneof: - field 'b': this is the selected field from the group, so it is serialized - field 'c': marked as 'serialized_on_wire', so it is added as well. = Fix The issue is that support for explicit presence changed the default value from PLACEHOLDER to None. This breaks the post_init method in that case, which is relatively easy to fix: if a field is optional, and set to None, this is considered as the default value (which it is). This fix however has a side-effect: the group_current for this field (the oneof trick for explicit presence) is no longer set. This changes the behavior when serializing the message in JSON: as the value is the default one (None), and the group is not set (which would force the serialization of the field), so None fields are no longer serialized in JSON. This break one test, and will be fixed in the next commit. * fix: do not serialize None fields in JSON format This is linked to the fix from the previous commit: after it, scalar None fields were not included in the JSON format, but some were still included. This is all cleaned up: None fields are not added in JSON by default, as they indicate the default value of fields with explicit presence. However, if `include_default_values is set, they are included.
1 parent 4455175 commit 6f0944c

File tree

7 files changed

+107
-23
lines changed

7 files changed

+107
-23
lines changed

src/betterproto/__init__.py

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,8 @@ class FieldMetadata:
145145
group: Optional[str] = None
146146
# Describes the wrapped type (e.g. when using google.protobuf.BoolValue)
147147
wraps: Optional[str] = None
148+
# Is the field optional
149+
optional: Optional[bool] = False
148150

149151
@staticmethod
150152
def get(field: dataclasses.Field) -> "FieldMetadata":
@@ -165,7 +167,9 @@ def dataclass_field(
165167
return dataclasses.field(
166168
default=None if optional else PLACEHOLDER,
167169
metadata={
168-
"betterproto": FieldMetadata(number, proto_type, map_types, group, wraps)
170+
"betterproto": FieldMetadata(
171+
number, proto_type, map_types, group, wraps, optional
172+
)
169173
},
170174
)
171175

@@ -620,7 +624,8 @@ def __post_init__(self) -> None:
620624
if meta.group:
621625
group_current.setdefault(meta.group)
622626

623-
if self.__raw_get(field_name) != PLACEHOLDER:
627+
value = self.__raw_get(field_name)
628+
if value != PLACEHOLDER and not (meta.optional and value is None):
624629
# Found a non-sentinel value
625630
all_sentinel = False
626631

@@ -1043,7 +1048,6 @@ def to_dict(
10431048
defaults = self._betterproto.default_gen
10441049
for field_name, meta in self._betterproto.meta_by_field_name.items():
10451050
field_is_repeated = defaults[field_name] is list
1046-
field_is_optional = defaults[field_name] is type(None)
10471051
value = getattr(self, field_name)
10481052
cased_name = casing(field_name).rstrip("_") # type: ignore
10491053
if meta.proto_type == TYPE_MESSAGE:
@@ -1082,7 +1086,8 @@ def to_dict(
10821086
if value or include_default_values:
10831087
output[cased_name] = value
10841088
elif value is None:
1085-
output[cased_name] = None
1089+
if include_default_values:
1090+
output[cased_name] = value
10861091
elif (
10871092
value._serialized_on_wire
10881093
or include_default_values
@@ -1109,16 +1114,17 @@ def to_dict(
11091114
if field_is_repeated:
11101115
output[cased_name] = [str(n) for n in value]
11111116
elif value is None:
1112-
output[cased_name] = value
1117+
if include_default_values:
1118+
output[cased_name] = value
11131119
else:
11141120
output[cased_name] = str(value)
11151121
elif meta.proto_type == TYPE_BYTES:
11161122
if field_is_repeated:
11171123
output[cased_name] = [
11181124
b64encode(b).decode("utf8") for b in value
11191125
]
1120-
elif value is None:
1121-
output[cased_name] = None
1126+
elif value is None and include_default_values:
1127+
output[cased_name] = value
11221128
else:
11231129
output[cased_name] = b64encode(value).decode("utf8")
11241130
elif meta.proto_type == TYPE_ENUM:
@@ -1132,8 +1138,9 @@ def to_dict(
11321138
# transparently upgrade single value to repeated
11331139
output[cased_name] = [enum_class(value).name]
11341140
elif value is None:
1135-
output[cased_name] = None
1136-
elif field_is_optional:
1141+
if include_default_values:
1142+
output[cased_name] = value
1143+
elif meta.optional:
11371144
enum_class = field_types[field_name].__args__[0]
11381145
output[cased_name] = enum_class(value).name
11391146
else:
@@ -1173,9 +1180,6 @@ def from_dict(self: T, value: Dict[str, Any]) -> T:
11731180
if value[key] is not None:
11741181
if meta.proto_type == TYPE_MESSAGE:
11751182
v = getattr(self, field_name)
1176-
if value[key] is None and self._get_field_default(key) == None:
1177-
# Setting an optional value to None.
1178-
setattr(self, field_name, None)
11791183
if isinstance(v, list):
11801184
cls = self._betterproto.cls_by_field[field_name]
11811185
if cls == datetime:
Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1 @@
1-
{
2-
"test1": null,
3-
"test2": null,
4-
"test3": null,
5-
"test4": null,
6-
"test5": null,
7-
"test6": null,
8-
"test7": null,
9-
"test8": null
10-
}
1+
{}

tests/inputs/proto3_field_presence/proto3_field_presence_missing.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
"test2": false,
44
"test3": "",
55
"test4": "",
6-
"test5": null,
76
"test6": "A",
87
"test7": "0",
98
"test8": 0
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import json
2+
3+
from tests.output_betterproto.proto3_field_presence import Test, InnerTest, TestEnum
4+
5+
6+
def test_null_fields_json():
7+
"""Ensure that using "null" in JSON is equivalent to not specifying a
8+
field, for fields with explicit presence"""
9+
10+
def test_json(ref_json: str, obj_json: str) -> None:
11+
"""`ref_json` and `obj_json` are JSON strings describing a `Test` object.
12+
Test that deserializing both leads to the same object, and that
13+
`ref_json` is the normalized format."""
14+
ref_obj = Test().from_json(ref_json)
15+
obj = Test().from_json(obj_json)
16+
17+
assert obj == ref_obj
18+
assert json.loads(obj.to_json(0)) == json.loads(ref_json)
19+
20+
test_json("{}", '{ "test1": null, "test2": null, "test3": null }')
21+
test_json("{}", '{ "test4": null, "test5": null, "test6": null }')
22+
test_json("{}", '{ "test7": null, "test8": null }')
23+
test_json('{ "test5": {} }', '{ "test3": null, "test5": {} }')
24+
25+
# Make sure that if include_default_values is set, None values are
26+
# exported.
27+
obj = Test()
28+
assert obj.to_dict() == {}
29+
assert obj.to_dict(include_default_values=True) == {
30+
"test1": None,
31+
"test2": None,
32+
"test3": None,
33+
"test4": None,
34+
"test5": None,
35+
"test6": None,
36+
"test7": None,
37+
"test8": None,
38+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"nested": {}
3+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
syntax = "proto3";
2+
3+
message Test {
4+
oneof kind {
5+
Nested nested = 1;
6+
WithOptional with_optional = 2;
7+
}
8+
}
9+
10+
message InnerNested {
11+
optional bool a = 1;
12+
}
13+
14+
message Nested {
15+
InnerNested inner = 1;
16+
}
17+
18+
message WithOptional {
19+
optional bool b = 2;
20+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
from tests.output_betterproto.proto3_field_presence_oneof import (
2+
Test,
3+
InnerNested,
4+
Nested,
5+
WithOptional,
6+
)
7+
8+
9+
def test_serialization():
10+
"""Ensure that serialization of fields unset but with explicit field
11+
presence do not bloat the serialized payload with length-delimited fields
12+
with length 0"""
13+
14+
def test_empty_nested(message: Test) -> None:
15+
# '0a' => tag 1, length delimited
16+
# '00' => length: 0
17+
assert bytes(message) == bytearray.fromhex("0a 00")
18+
19+
test_empty_nested(Test(nested=Nested()))
20+
test_empty_nested(Test(nested=Nested(inner=None)))
21+
test_empty_nested(Test(nested=Nested(inner=InnerNested(a=None))))
22+
23+
def test_empty_with_optional(message: Test) -> None:
24+
# '12' => tag 2, length delimited
25+
# '00' => length: 0
26+
assert bytes(message) == bytearray.fromhex("12 00")
27+
28+
test_empty_with_optional(Test(with_optional=WithOptional()))
29+
test_empty_with_optional(Test(with_optional=WithOptional(b=None)))

0 commit comments

Comments
 (0)