Skip to content

Commit e21672b

Browse files
ffi: Add support for serializing/deserializing auto-generated and user-generated schema tree node IDs. (#557)
Co-authored-by: kirkrodrigues <2454684+kirkrodrigues@users.noreply.github.com>
1 parent de2cf07 commit e21672b

File tree

9 files changed

+337
-98
lines changed

9 files changed

+337
-98
lines changed

components/core/src/clp/ffi/ir_stream/Serializer.cpp

Lines changed: 15 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -403,15 +403,16 @@ auto Serializer<encoded_variable_t>::serialize_schema_tree_node(
403403
return false;
404404
}
405405

406-
auto const parent_id{locator.get_parent_id()};
407-
if (parent_id <= UINT8_MAX) {
408-
m_schema_tree_node_buf.push_back(cProtocol::Payload::SchemaTreeNodeParentIdUByte);
409-
m_schema_tree_node_buf.push_back(bit_cast<int8_t>(static_cast<uint8_t>(parent_id)));
410-
} else if (parent_id <= UINT16_MAX) {
411-
m_schema_tree_node_buf.push_back(cProtocol::Payload::SchemaTreeNodeParentIdUShort);
412-
serialize_int(static_cast<uint16_t>(parent_id), m_schema_tree_node_buf);
413-
} else {
414-
// Out of range
406+
if (false
407+
== encode_and_serialize_schema_tree_node_id<
408+
false,
409+
cProtocol::Payload::EncodedSchemaTreeNodeParentIdByte,
410+
cProtocol::Payload::EncodedSchemaTreeNodeParentIdShort,
411+
cProtocol::Payload::EncodedSchemaTreeNodeParentIdInt>(
412+
locator.get_parent_id(),
413+
m_schema_tree_node_buf
414+
))
415+
{
415416
return false;
416417
}
417418

@@ -420,16 +421,11 @@ auto Serializer<encoded_variable_t>::serialize_schema_tree_node(
420421

421422
template <typename encoded_variable_t>
422423
auto Serializer<encoded_variable_t>::serialize_key(SchemaTree::Node::id_t id) -> bool {
423-
if (id <= UINT8_MAX) {
424-
m_key_group_buf.push_back(cProtocol::Payload::KeyIdUByte);
425-
m_key_group_buf.push_back(bit_cast<int8_t>(static_cast<uint8_t>(id)));
426-
} else if (id <= UINT16_MAX) {
427-
m_key_group_buf.push_back(cProtocol::Payload::KeyIdUShort);
428-
serialize_int(static_cast<uint16_t>(id), m_key_group_buf);
429-
} else {
430-
return false;
431-
}
432-
return true;
424+
return encode_and_serialize_schema_tree_node_id<
425+
false,
426+
cProtocol::Payload::EncodedSchemaTreeNodeIdByte,
427+
cProtocol::Payload::EncodedSchemaTreeNodeIdShort,
428+
cProtocol::Payload::EncodedSchemaTreeNodeIdInt>(id, m_key_group_buf);
433429
}
434430

435431
template <typename encoded_variable_t>

components/core/src/clp/ffi/ir_stream/Serializer.hpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,7 @@ class Serializer {
103103
/**
104104
* Serializes the given key ID into `m_key_group_buf`.
105105
* @param id
106-
* @return true on success.
107-
* @return false if the ID exceeds the representable range.
106+
* @return Forwards `encode_and_serialize_schema_tree_node_id`'s return values.
108107
*/
109108
[[nodiscard]] auto serialize_key(SchemaTree::Node::id_t id) -> bool;
110109

components/core/src/clp/ffi/ir_stream/ir_unit_deserialization_methods.cpp

Lines changed: 77 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <memory>
66
#include <optional>
77
#include <string>
8+
#include <system_error>
89
#include <type_traits>
910
#include <utility>
1011
#include <vector>
@@ -43,16 +44,16 @@ using Schema = std::vector<SchemaTree::Node::id_t>;
4344
/**
4445
* Deserializes the parent ID of a schema tree node.
4546
* @param reader
46-
* @param parent_id Returns the deserialized result.
47-
* @return IRErrorCode::IRErrorCode_Success on success.
48-
* @return IRErrorCode::IRErrorCode_Incomplete_IR if the stream is truncated.
49-
* @return IRErrorCode::IRErrorCode_Corrupted_IR if the next packet in the stream isn't a parent ID.
50-
* @return Forwards `deserialize_tag`'s return values on any other failure.
47+
* @return A result containing a pair or an error code indicating the failure:
48+
* - The pair:
49+
* - Whether the node ID is for an auto-generated node.
50+
* - The decoded node ID.
51+
* - The possible error codes:
52+
* - Forwards `deserialize_tag`'s return values.
53+
* @return Forwards `deserialize_and_decode_schema_tree_node_id`'s return values.
5154
*/
52-
[[nodiscard]] auto deserialize_schema_tree_node_parent_id(
53-
ReaderInterface& reader,
54-
SchemaTree::Node::id_t& parent_id
55-
) -> IRErrorCode;
55+
[[nodiscard]] auto deserialize_schema_tree_node_parent_id(ReaderInterface& reader
56+
) -> OUTCOME_V2_NAMESPACE::std_result<std::pair<bool, SchemaTree::Node::id_t>>;
5657

5758
/**
5859
* Deserializes the key name of a schema tree node.
@@ -100,13 +101,14 @@ deserialize_int_val(ReaderInterface& reader, encoded_tag_t tag, value_int_t& val
100101
* Deserializes the IDs of all keys in a log event.
101102
* @param reader
102103
* @param tag Takes the current tag as input and returns the last tag read.
103-
* @param schema Returns the deserialized schema.
104-
* @return IRErrorCode::IRErrorCode_Success on success.
105-
* @return IRErrorCode::IRErrorCode_Incomplete_IR if the stream is truncated.
106-
* @return Forwards `deserialize_tag`'s return values on any other failure.
104+
* @return A result containing the deserialized schema or an error code indicating the failure:
105+
* - std::err::protocol_not_supported if the IR stream contains auto-generated keys (TODO: Remove
106+
* this once auto-generated keys are fully supported).
107+
* - Forwards `deserialize_tag`'s return values.
108+
* - Forwards `deserialize_and_decode_schema_tree_node_id`'s return values.
107109
*/
108-
[[nodiscard]] auto
109-
deserialize_schema(ReaderInterface& reader, encoded_tag_t& tag, Schema& schema) -> IRErrorCode;
110+
[[nodiscard]] auto deserialize_schema(ReaderInterface& reader, encoded_tag_t& tag)
111+
-> OUTCOME_V2_NAMESPACE::std_result<Schema>;
110112

111113
/**
112114
* Deserializes the next value and pushes the result into `node_id_value_pairs`.
@@ -170,10 +172,17 @@ requires(std::is_same_v<ir::four_byte_encoded_variable_t, encoded_variable_t>
170172
) -> IRErrorCode;
171173

172174
/**
175+
* @param tag
173176
* @return Whether the given tag can be a valid leading tag of a log event IR unit.
174177
*/
175178
[[nodiscard]] auto is_log_event_ir_unit_tag(encoded_tag_t tag) -> bool;
176179

180+
/**
181+
* @param tag
182+
* @return Whether the given tag represents a valid encoded key ID.
183+
*/
184+
[[nodiscard]] auto is_encoded_key_id_tag(encoded_tag_t tag) -> bool;
185+
177186
auto schema_tree_node_tag_to_type(encoded_tag_t tag) -> std::optional<SchemaTree::Node::Type> {
178187
switch (tag) {
179188
case cProtocol::Payload::SchemaTreeNodeInt:
@@ -193,30 +202,16 @@ auto schema_tree_node_tag_to_type(encoded_tag_t tag) -> std::optional<SchemaTree
193202
}
194203
}
195204

196-
auto deserialize_schema_tree_node_parent_id(
197-
ReaderInterface& reader,
198-
SchemaTree::Node::id_t& parent_id
199-
) -> IRErrorCode {
205+
auto deserialize_schema_tree_node_parent_id(ReaderInterface& reader
206+
) -> OUTCOME_V2_NAMESPACE::std_result<std::pair<bool, SchemaTree::Node::id_t>> {
200207
encoded_tag_t tag{};
201208
if (auto const err{deserialize_tag(reader, tag)}; IRErrorCode::IRErrorCode_Success != err) {
202-
return err;
203-
}
204-
if (cProtocol::Payload::SchemaTreeNodeParentIdUByte == tag) {
205-
uint8_t deserialized_id{};
206-
if (false == deserialize_int(reader, deserialized_id)) {
207-
return IRErrorCode::IRErrorCode_Incomplete_IR;
208-
}
209-
parent_id = static_cast<SchemaTree::Node::id_t>(deserialized_id);
210-
} else if (cProtocol::Payload::SchemaTreeNodeParentIdUShort == tag) {
211-
uint16_t deserialized_id{};
212-
if (false == deserialize_int(reader, deserialized_id)) {
213-
return IRErrorCode::IRErrorCode_Incomplete_IR;
214-
}
215-
parent_id = static_cast<SchemaTree::Node::id_t>(deserialized_id);
216-
} else {
217-
return IRErrorCode::IRErrorCode_Corrupted_IR;
209+
return ir_error_code_to_errc(err);
218210
}
219-
return IRErrorCode_Success;
211+
return deserialize_and_decode_schema_tree_node_id<
212+
cProtocol::Payload::EncodedSchemaTreeNodeParentIdByte,
213+
cProtocol::Payload::EncodedSchemaTreeNodeParentIdShort,
214+
cProtocol::Payload::EncodedSchemaTreeNodeParentIdInt>(tag, reader);
220215
}
221216

222217
auto deserialize_schema_tree_node_key_name(ReaderInterface& reader, std::string& key_name)
@@ -297,32 +292,35 @@ auto deserialize_string(ReaderInterface& reader, encoded_tag_t tag, std::string&
297292
return IRErrorCode::IRErrorCode_Success;
298293
}
299294

300-
auto deserialize_schema(ReaderInterface& reader, encoded_tag_t& tag, Schema& schema)
301-
-> IRErrorCode {
302-
schema.clear();
295+
auto deserialize_schema(ReaderInterface& reader, encoded_tag_t& tag)
296+
-> OUTCOME_V2_NAMESPACE::std_result<Schema> {
297+
Schema schema;
303298
while (true) {
304-
if (cProtocol::Payload::KeyIdUByte == tag) {
305-
uint8_t id{};
306-
if (false == deserialize_int(reader, id)) {
307-
return IRErrorCode::IRErrorCode_Incomplete_IR;
308-
}
309-
schema.push_back(static_cast<SchemaTree::Node::id_t>(id));
310-
} else if (cProtocol::Payload::KeyIdUShort == tag) {
311-
uint16_t id{};
312-
if (false == deserialize_int(reader, id)) {
313-
return IRErrorCode::IRErrorCode_Incomplete_IR;
314-
}
315-
schema.push_back(static_cast<SchemaTree::Node::id_t>(id));
316-
} else {
299+
if (false == is_encoded_key_id_tag(tag)) {
300+
// The log event must be an empty value.
317301
break;
318302
}
319303

304+
auto const schema_tree_node_id_result{deserialize_and_decode_schema_tree_node_id<
305+
cProtocol::Payload::EncodedSchemaTreeNodeIdByte,
306+
cProtocol::Payload::EncodedSchemaTreeNodeIdShort,
307+
cProtocol::Payload::EncodedSchemaTreeNodeIdInt>(tag, reader)};
308+
if (schema_tree_node_id_result.has_error()) {
309+
return schema_tree_node_id_result.error();
310+
}
311+
auto const [is_auto_generated, node_id]{schema_tree_node_id_result.value()};
312+
if (is_auto_generated) {
313+
// Currently, we don't support auto-generated keys.
314+
return std::errc::protocol_not_supported;
315+
}
316+
schema.push_back(node_id);
317+
320318
if (auto const err{deserialize_tag(reader, tag)}; IRErrorCode::IRErrorCode_Success != err) {
321-
return err;
319+
return ir_error_code_to_errc(err);
322320
}
323321
}
324322

325-
return IRErrorCode::IRErrorCode_Success;
323+
return schema;
326324
}
327325

328326
auto deserialize_value_and_insert_to_node_id_value_pairs(
@@ -469,12 +467,24 @@ auto is_log_event_ir_unit_tag(encoded_tag_t tag) -> bool {
469467
// The log event is an empty object
470468
return true;
471469
}
472-
if (cProtocol::Payload::KeyIdUByte == tag || cProtocol::Payload::KeyIdUShort == tag) {
470+
if (is_encoded_key_id_tag(tag)) {
473471
// If not empty, the log event must start with a tag byte indicating the key ID
474472
return true;
475473
}
476474
return false;
477475
}
476+
477+
auto is_encoded_key_id_tag(encoded_tag_t tag) -> bool {
478+
// Ideally, we could check whether the tag is within the range of
479+
// [EncodedKeyIdByte, EncodedKeyIdInt], but we don't for two reasons:
480+
// - We optimize for streams that have few key IDs, meaning we can short circuit in the first
481+
// branch below.
482+
// - Using a range check assumes all length indicators are defined continuously, in order, but
483+
// we don't have static checks for this assumption.
484+
return cProtocol::Payload::EncodedSchemaTreeNodeIdByte == tag
485+
|| cProtocol::Payload::EncodedSchemaTreeNodeIdShort == tag
486+
|| cProtocol::Payload::EncodedSchemaTreeNodeIdInt == tag;
487+
}
478488
} // namespace
479489

480490
auto get_ir_unit_type_from_tag(encoded_tag_t tag) -> std::optional<IrUnitType> {
@@ -508,11 +518,14 @@ auto deserialize_ir_unit_schema_tree_node_insertion(
508518
return ir_error_code_to_errc(IRErrorCode::IRErrorCode_Corrupted_IR);
509519
}
510520

511-
SchemaTree::Node::id_t parent_id{};
512-
if (auto const err{deserialize_schema_tree_node_parent_id(reader, parent_id)};
513-
IRErrorCode_Success != err)
514-
{
515-
return ir_error_code_to_errc(err);
521+
auto const parent_node_id_result{deserialize_schema_tree_node_parent_id(reader)};
522+
if (parent_node_id_result.has_error()) {
523+
return parent_node_id_result.error();
524+
}
525+
auto const [is_auto_generated, parent_id]{parent_node_id_result.value()};
526+
if (is_auto_generated) {
527+
// Currently, we don't support auto-generated keys.
528+
return std::errc::protocol_not_supported;
516529
}
517530

518531
if (auto const err{deserialize_schema_tree_node_key_name(reader, key_name)};
@@ -541,12 +554,11 @@ auto deserialize_ir_unit_kv_pair_log_event(
541554
std::shared_ptr<SchemaTree> schema_tree,
542555
UtcOffset utc_offset
543556
) -> OUTCOME_V2_NAMESPACE::std_result<KeyValuePairLogEvent> {
544-
Schema schema;
545-
if (auto const err{deserialize_schema(reader, tag, schema)};
546-
IRErrorCode::IRErrorCode_Success != err)
547-
{
548-
return ir_error_code_to_errc(err);
557+
auto const schema_result{deserialize_schema(reader, tag)};
558+
if (schema_result.has_error()) {
559+
return schema_result.error();
549560
}
561+
auto const& schema{schema_result.value()};
550562

551563
KeyValuePairLogEvent::NodeIdValuePairs node_id_value_pairs;
552564
if (false == schema.empty()) {

components/core/src/clp/ffi/ir_stream/ir_unit_deserialization_methods.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ namespace clp::ffi::ir_stream {
3232
* indicating the failure:
3333
* - std::errc::result_out_of_range if the IR stream is truncated.
3434
* - std::errc::protocol_error if the deserialized node type isn't supported.
35+
* - std::errc::protocol_not_supported if the IR stream contains auto-generated keys (TODO: Remove
36+
* this once auto-generated keys are fully supported).
3537
* - Forwards `deserialize_schema_tree_node_key_name`'s return values.
3638
* - Forwards `deserialize_schema_tree_node_parent_id`'s return values.
3739
*/
@@ -63,6 +65,7 @@ namespace clp::ffi::ir_stream {
6365
* - std::errc::protocol_error if the IR stream is corrupted.
6466
* - std::errc::protocol_not_supported if the IR stream contains an unsupported metadata format
6567
* or uses an unsupported version.
68+
* - Forwards `deserialize_schema`'s return values.
6669
* - Forwards `KeyValuePairLogEvent::create`'s return values if the intermediate deserialized result
6770
* cannot construct a valid key-value pair log event.
6871
*/

components/core/src/clp/ffi/ir_stream/protocol_constants.hpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ constexpr int8_t LengthUShort = 0x12;
1313

1414
constexpr char VersionKey[] = "VERSION";
1515
constexpr char VersionValue[] = "0.0.2";
16-
constexpr char BetaVersionValue[] = "0.1.0-beta";
16+
constexpr char BetaVersionValue[] = "0.1.0-beta.1";
1717

1818
// The following regex can be used to validate a Semantic Versioning string. The source of the
1919
// regex can be found here: https://semver.org/
@@ -67,11 +67,13 @@ constexpr int8_t ValueEightByteEncodingClpStr = 0x5A;
6767
constexpr int8_t ValueEmpty = 0x5E;
6868
constexpr int8_t ValueNull = 0x5F;
6969

70-
constexpr int8_t SchemaTreeNodeParentIdUByte = 0x60;
71-
constexpr int8_t SchemaTreeNodeParentIdUShort = 0x61;
70+
constexpr int8_t EncodedSchemaTreeNodeParentIdByte = 0x60;
71+
constexpr int8_t EncodedSchemaTreeNodeParentIdShort = 0x61;
72+
constexpr int8_t EncodedSchemaTreeNodeParentIdInt = 0x62;
7273

73-
constexpr int8_t KeyIdUByte = 0x65;
74-
constexpr int8_t KeyIdUShort = 0x66;
74+
constexpr int8_t EncodedSchemaTreeNodeIdByte = 0x65;
75+
constexpr int8_t EncodedSchemaTreeNodeIdShort = 0x66;
76+
constexpr int8_t EncodedSchemaTreeNodeIdInt = 0x67;
7577

7678
constexpr int8_t SchemaTreeNodeMask = 0x70;
7779

components/core/src/clp/ffi/ir_stream/utils.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ auto serialize_string(std::string_view str, std::vector<int8_t>& output_buf) ->
5454

5555
auto ir_error_code_to_errc(IRErrorCode ir_error_code) -> std::errc {
5656
switch (ir_error_code) {
57+
case IRErrorCode_Success:
58+
return {};
5759
case IRErrorCode_Incomplete_IR:
5860
return std::errc::result_out_of_range;
5961
case IRErrorCode_Corrupted_IR:

0 commit comments

Comments
 (0)