From d37dd4c0ffcd911b1fa817369c19825e61e43b02 Mon Sep 17 00:00:00 2001 From: Kevin Wilfong Date: Wed, 7 Feb 2024 09:58:09 -0800 Subject: [PATCH] Rename VectorSerializer to IterativeVectorSerializer to distinguish it from BatchVectorSerializer (#8605) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/8605 To make the differences between VectorSerializer and BatchVectorSerializer more explicit, I'm renaming VectorSerializer to IterativeVectorSerializer. I also rename createSerializer to createIterativeSerializer, however, since Presto uses createSerializer, I've left it in. Once this diff lands, I'll land a change in Presto to use createIterativeSerializer, and then land a change back in Velox to remove createSerializer. Reviewed By: bikramSingh91 Differential Revision: D53201916 fbshipit-source-id: 69947b4bf7a50f43fd27126b3b29688e9d8300ef --- velox/serializers/CompactRowSerializer.cpp | 5 +++-- velox/serializers/CompactRowSerializer.h | 2 +- velox/serializers/PrestoSerializer.cpp | 13 ++++++------ velox/serializers/PrestoSerializer.h | 11 +++++----- velox/serializers/UnsafeRowSerializer.cpp | 5 +++-- velox/serializers/UnsafeRowSerializer.h | 2 +- .../tests/CompactRowSerializerTest.cpp | 3 ++- .../tests/PrestoSerializerTest.cpp | 4 ++-- .../tests/UnsafeRowSerializerTest.cpp | 3 ++- velox/vector/VectorStream.cpp | 6 +++--- velox/vector/VectorStream.h | 20 +++++++++++++++---- velox/vector/tests/VectorStreamTest.cpp | 2 +- 12 files changed, 47 insertions(+), 29 deletions(-) diff --git a/velox/serializers/CompactRowSerializer.cpp b/velox/serializers/CompactRowSerializer.cpp index a47be6c7e742..b44f804995ef 100644 --- a/velox/serializers/CompactRowSerializer.cpp +++ b/velox/serializers/CompactRowSerializer.cpp @@ -28,7 +28,7 @@ void CompactRowVectorSerde::estimateSerializedSize( } namespace { -class CompactRowVectorSerializer : public VectorSerializer { +class CompactRowVectorSerializer : public IterativeVectorSerializer { public: using TRowSize = uint32_t; @@ -120,7 +120,8 @@ std::string concatenatePartialRow( } // namespace -std::unique_ptr CompactRowVectorSerde::createSerializer( +std::unique_ptr +CompactRowVectorSerde::createIterativeSerializer( RowTypePtr /* type */, int32_t /* numRows */, StreamArena* streamArena, diff --git a/velox/serializers/CompactRowSerializer.h b/velox/serializers/CompactRowSerializer.h index 10c5a928cb49..8b6f1a998450 100644 --- a/velox/serializers/CompactRowSerializer.h +++ b/velox/serializers/CompactRowSerializer.h @@ -33,7 +33,7 @@ class CompactRowVectorSerde : public VectorSerde { // This method is not used in production code. It is only used to // support round-trip tests for deserialization. - std::unique_ptr createSerializer( + std::unique_ptr createIterativeSerializer( RowTypePtr type, int32_t numRows, StreamArena* streamArena, diff --git a/velox/serializers/PrestoSerializer.cpp b/velox/serializers/PrestoSerializer.cpp index ef8b5ceed5a5..149a70948fc5 100644 --- a/velox/serializers/PrestoSerializer.cpp +++ b/velox/serializers/PrestoSerializer.cpp @@ -3316,9 +3316,9 @@ class PrestoBatchVectorSerializer : public BatchVectorSerializer { const std::unique_ptr codec_; }; -class PrestoVectorSerializer : public VectorSerializer { +class PrestoIterativeVectorSerializer : public IterativeVectorSerializer { public: - PrestoVectorSerializer( + PrestoIterativeVectorSerializer( const RowTypePtr& rowType, std::vector encodings, int32_t numRows, @@ -3349,7 +3349,7 @@ class PrestoVectorSerializer : public VectorSerializer { // Constructor that takes a row vector instead of only the types. This is // different because then we know exactly how each vector is encoded // (recursively). - PrestoVectorSerializer( + PrestoIterativeVectorSerializer( const RowVectorPtr& rowVector, StreamArena* streamArena, bool useLosslessTimestamp, @@ -3455,13 +3455,14 @@ void PrestoVectorSerde::estimateSerializedSize( estimateSerializedSizeInt(vector->loadedVector(), rows, sizes, scratch); } -std::unique_ptr PrestoVectorSerde::createSerializer( +std::unique_ptr +PrestoVectorSerde::createIterativeSerializer( RowTypePtr type, int32_t numRows, StreamArena* streamArena, const Options* options) { const auto prestoOptions = toPrestoOptions(options); - return std::make_unique( + return std::make_unique( type, prestoOptions.encodings, numRows, @@ -3484,7 +3485,7 @@ void PrestoVectorSerde::deprecatedSerializeEncoded( const Options* options, OutputStream* out) { auto prestoOptions = toPrestoOptions(options); - auto serializer = std::make_unique( + auto serializer = std::make_unique( vector, streamArena, prestoOptions.useLosslessTimestamp, diff --git a/velox/serializers/PrestoSerializer.h b/velox/serializers/PrestoSerializer.h index 0c4985e93ba4..a957e27ce76e 100644 --- a/velox/serializers/PrestoSerializer.h +++ b/velox/serializers/PrestoSerializer.h @@ -24,10 +24,11 @@ namespace facebook::velox::serializer::presto { /// There are two ways to serialize data using PrestoVectorSerde: /// /// 1. In order to append multiple RowVectors into the same serialized payload, -/// one can first create a VectorSerializer using createSerializer(), then -/// append successive RowVectors using VectorSerializer::append(). In this case, -/// since different RowVector might encode columns differently, data is always -/// flattened in the serialized payload. +/// one can first create an IterativeVectorSerializer using +/// createIterativeSerializer(), then append successive RowVectors using +/// IterativeVectorSerializer::append(). In this case, since different RowVector +/// might encode columns differently, data is always flattened in the serialized +/// payload. /// /// Note that there are two flavors of append(), one that takes a range of rows, /// and one that takes a list of row ids. The former is useful when serializing @@ -76,7 +77,7 @@ class PrestoVectorSerde : public VectorSerde { vector_size_t** sizes, Scratch& scratch) override; - std::unique_ptr createSerializer( + std::unique_ptr createIterativeSerializer( RowTypePtr type, int32_t numRows, StreamArena* streamArena, diff --git a/velox/serializers/UnsafeRowSerializer.cpp b/velox/serializers/UnsafeRowSerializer.cpp index 342311c5d926..0d940000fd70 100644 --- a/velox/serializers/UnsafeRowSerializer.cpp +++ b/velox/serializers/UnsafeRowSerializer.cpp @@ -29,7 +29,7 @@ void UnsafeRowVectorSerde::estimateSerializedSize( } namespace { -class UnsafeRowVectorSerializer : public VectorSerializer { +class UnsafeRowVectorSerializer : public IterativeVectorSerializer { public: using TRowSize = uint32_t; @@ -122,7 +122,8 @@ std::string concatenatePartialRow( } // namespace -std::unique_ptr UnsafeRowVectorSerde::createSerializer( +std::unique_ptr +UnsafeRowVectorSerde::createIterativeSerializer( RowTypePtr /* type */, int32_t /* numRows */, StreamArena* streamArena, diff --git a/velox/serializers/UnsafeRowSerializer.h b/velox/serializers/UnsafeRowSerializer.h index 1bf41c4f0cae..1c793c98717e 100644 --- a/velox/serializers/UnsafeRowSerializer.h +++ b/velox/serializers/UnsafeRowSerializer.h @@ -31,7 +31,7 @@ class UnsafeRowVectorSerde : public VectorSerde { // This method is not used in production code. It is only used to // support round-trip tests for deserialization. - std::unique_ptr createSerializer( + std::unique_ptr createIterativeSerializer( RowTypePtr type, int32_t numRows, StreamArena* streamArena, diff --git a/velox/serializers/tests/CompactRowSerializerTest.cpp b/velox/serializers/tests/CompactRowSerializerTest.cpp index a556c1e538ab..50fcd8d2c91f 100644 --- a/velox/serializers/tests/CompactRowSerializerTest.cpp +++ b/velox/serializers/tests/CompactRowSerializerTest.cpp @@ -43,7 +43,8 @@ class CompactRowSerializerTest : public ::testing::Test, auto arena = std::make_unique(pool_.get()); auto rowType = asRowType(rowVector->type()); - auto serializer = serde_->createSerializer(rowType, numRows, arena.get()); + auto serializer = + serde_->createIterativeSerializer(rowType, numRows, arena.get()); Scratch scratch; serializer->append(rowVector, folly::Range(rows.data(), numRows), scratch); diff --git a/velox/serializers/tests/PrestoSerializerTest.cpp b/velox/serializers/tests/PrestoSerializerTest.cpp index ed5710939bc3..6c75a67d6c3a 100644 --- a/velox/serializers/tests/PrestoSerializerTest.cpp +++ b/velox/serializers/tests/PrestoSerializerTest.cpp @@ -92,8 +92,8 @@ class PrestoSerializerTest auto rowType = asRowType(rowVector->type()); auto numRows = rowVector->size(); auto paramOptions = getParamSerdeOptions(serdeOptions); - auto serializer = - serde_->createSerializer(rowType, numRows, arena.get(), ¶mOptions); + auto serializer = serde_->createIterativeSerializer( + rowType, numRows, arena.get(), ¶mOptions); vector_size_t sizeEstimate = 0; Scratch scratch; diff --git a/velox/serializers/tests/UnsafeRowSerializerTest.cpp b/velox/serializers/tests/UnsafeRowSerializerTest.cpp index a8cc0d4c2773..ce6ad23aa1f5 100644 --- a/velox/serializers/tests/UnsafeRowSerializerTest.cpp +++ b/velox/serializers/tests/UnsafeRowSerializerTest.cpp @@ -43,7 +43,8 @@ class UnsafeRowSerializerTest : public ::testing::Test, auto arena = std::make_unique(pool_.get()); auto rowType = std::dynamic_pointer_cast(rowVector->type()); - auto serializer = serde_->createSerializer(rowType, numRows, arena.get()); + auto serializer = + serde_->createIterativeSerializer(rowType, numRows, arena.get()); Scratch scratch; serializer->append(rowVector, folly::Range(rows.data(), numRows), scratch); diff --git a/velox/vector/VectorStream.cpp b/velox/vector/VectorStream.cpp index 758874c11239..069f2472f894 100644 --- a/velox/vector/VectorStream.cpp +++ b/velox/vector/VectorStream.cpp @@ -41,7 +41,7 @@ class DefaultBatchVectorSerializer : public BatchVectorSerializer { } StreamArena arena(pool_); - auto serializer = serde_->createSerializer( + auto serializer = serde_->createIterativeSerializer( asRowType(vector->type()), numRows, &arena, options_); serializer->append(vector, ranges, scratch); serializer->flush(stream); @@ -67,7 +67,7 @@ getNamedVectorSerdeImpl() { } // namespace -void VectorSerializer::append(const RowVectorPtr& vector) { +void IterativeVectorSerializer::append(const RowVectorPtr& vector) { const IndexRange allRows{0, vector->size()}; Scratch scratch; append(vector, folly::Range(&allRows, 1), scratch); @@ -144,7 +144,7 @@ void VectorStreamGroup::createStreamTree( RowTypePtr type, int32_t numRows, const VectorSerde::Options* options) { - serializer_ = serde_->createSerializer(type, numRows, this, options); + serializer_ = serde_->createIterativeSerializer(type, numRows, this, options); } void VectorStreamGroup::append( diff --git a/velox/vector/VectorStream.h b/velox/vector/VectorStream.h index ef9a38eb8094..6590532b03b1 100644 --- a/velox/vector/VectorStream.h +++ b/velox/vector/VectorStream.h @@ -39,9 +39,9 @@ struct IndexRange { /// Uses successive calls to `append` to add more rows to the serialization /// buffer. Then call `flush` to write the aggregate serialized data to an /// OutputStream. -class VectorSerializer { +class IterativeVectorSerializer { public: - virtual ~VectorSerializer() = default; + virtual ~IterativeVectorSerializer() = default; /// Serialize a subset of rows in a vector. virtual void append( @@ -159,7 +159,19 @@ class VectorSerde { /// /// This is more appropriate if the use case involves many small writes, e.g. /// partitioning a RowVector across multiple destinations. - virtual std::unique_ptr createSerializer( + /// + /// TODO: Remove createSerializer once Presto is updated to call + /// createIterativeSerializer. + virtual std::unique_ptr createSerializer( + RowTypePtr type, + int32_t numRows, + StreamArena* streamArena, + const Options* options = nullptr) { + return createIterativeSerializer( + std::move(type), numRows, streamArena, options); + } + + virtual std::unique_ptr createIterativeSerializer( RowTypePtr type, int32_t numRows, StreamArena* streamArena, @@ -298,7 +310,7 @@ class VectorStreamGroup : public StreamArena { const VectorSerde::Options* options = nullptr); private: - std::unique_ptr serializer_; + std::unique_ptr serializer_; VectorSerde* serde_{nullptr}; }; diff --git a/velox/vector/tests/VectorStreamTest.cpp b/velox/vector/tests/VectorStreamTest.cpp index ecc9f8cda2e0..a5b6707163fd 100644 --- a/velox/vector/tests/VectorStreamTest.cpp +++ b/velox/vector/tests/VectorStreamTest.cpp @@ -25,7 +25,7 @@ class MockVectorSerde : public VectorSerde { const folly::Range& ranges, vector_size_t** sizes) override {} - std::unique_ptr createSerializer( + std::unique_ptr createIterativeSerializer( RowTypePtr type, int32_t numRows, StreamArena* streamArena,