-
Notifications
You must be signed in to change notification settings - Fork 3.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GH-43911: [C++] Compute Row: ListKeyEncoder Supports #43912
base: main
Are you sure you want to change the base?
Changes from 1 commit
f5f0f33
7ed5df4
9aaff83
5055ef0
222909a
4e8325e
72705a9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -269,6 +269,28 @@ struct ARROW_EXPORT NullKeyEncoder : KeyEncoder { | |
} | ||
}; | ||
|
||
struct ARROW_EXPORT ListKeyEncoder : KeyEncoder { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wonder should I put this into There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, please do. It would be nice to hide most contents from this file into the corresponding There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add a comment explaining how the encoding looks like? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, I see you added a comment below. |
||
explicit ListKeyEncoder(std::shared_ptr<DataType> element_type, std::shared_ptr<KeyEncoder> element_encoder); | ||
|
||
void AddLength(const ExecValue&, int64_t batch_length, int32_t* lengths) override; | ||
|
||
void AddLengthNull(int32_t* length) override; | ||
|
||
Status Encode(const ExecValue& data, int64_t batch_length, | ||
uint8_t** encoded_bytes) override; | ||
|
||
void EncodeNull(uint8_t** encoded_bytes) override; | ||
|
||
Result<std::shared_ptr<ArrayData>> Decode(uint8_t** encoded_bytes, int32_t length, | ||
MemoryPool* pool) override; | ||
|
||
std::shared_ptr<DataType> element_type_; | ||
std::shared_ptr<KeyEncoder> element_encoder_; | ||
// extension_type_ is used to store the extension type of the list element. | ||
// It would be nullptr if the list element is not an extension type. | ||
std::shared_ptr<ExtensionType> extension_type_; | ||
}; | ||
|
||
/// RowEncoder encodes ExecSpan to a variable length byte sequence | ||
/// created by concatenating the encoded form of each column. The encoding | ||
/// for each column depends on its data type. | ||
|
@@ -328,14 +350,23 @@ struct ARROW_EXPORT NullKeyEncoder : KeyEncoder { | |
/// Null string Would be encoded as: | ||
/// 1 ( 1 byte for null) + 0 ( 4 bytes for length ) | ||
/// | ||
/// ## List Type | ||
/// | ||
/// List Type is encoded as: | ||
/// [null byte, list element count, [element 1, element 2, ...]] | ||
/// Element count uses 4 bytes. | ||
/// | ||
/// Currently, we only support encoding of primitive types, dictionary types | ||
/// in the list, the nested list is not supported. | ||
/// | ||
/// # Row Encoding | ||
/// | ||
/// The row format is the concatenation of the encodings of each column. | ||
class ARROW_EXPORT RowEncoder { | ||
public: | ||
static constexpr int kRowIdForNulls() { return -1; } | ||
|
||
void Init(const std::vector<TypeHolder>& column_types, ExecContext* ctx); | ||
Status Init(const std::vector<TypeHolder>& column_types, ExecContext* ctx); | ||
void Clear(); | ||
Status EncodeAndAppend(const ExecSpan& batch); | ||
Result<ExecBatch> Decode(int64_t num_rows, const int32_t* row_ids); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This can also return
unique_ptr
here. I didn't see the purpose a shared_ptr being usedAlso this function is extracted from
RowEncoder