11#include " process_columns.h"
22#include " common/adapter.h"
3+ #include " modifier/subset.h"
34
45#include < util/string/join.h>
56
@@ -28,16 +29,23 @@ std::shared_ptr<TDataContainer> ExtractColumnsValidateImpl(const std::shared_ptr
2829
2930template <class TDataContainer >
3031TConclusion<std::shared_ptr<TDataContainer>> AdaptColumnsImpl (const std::shared_ptr<TDataContainer>& srcBatch,
31- const std::shared_ptr<arrow::Schema>& dstSchema) {
32+ const std::shared_ptr<arrow::Schema>& dstSchema, TSchemaSubset* subset ) {
3233 AFL_VERIFY (srcBatch);
3334 AFL_VERIFY (dstSchema);
3435 std::vector<std::shared_ptr<typename NAdapter::TDataBuilderPolicy<TDataContainer>::TColumn>> columns;
3536 columns.reserve (dstSchema->num_fields ());
36-
37+ std::vector<std::shared_ptr<arrow::Field>> fields;
38+ fields.reserve (dstSchema->num_fields ());
39+ std::set<ui32> fieldIdx;
40+ ui32 idx = 0 ;
3741 for (auto & field : dstSchema->fields ()) {
3842 const int index = srcBatch->schema ()->GetFieldIndex (field->name ());
3943 if (index > -1 ) {
44+ if (subset) {
45+ fieldIdx.emplace (idx);
46+ }
4047 columns.push_back (srcBatch->column (index));
48+ fields.emplace_back (field);
4149 auto srcField = srcBatch->schema ()->field (index);
4250 if (field->Equals (srcField)) {
4351 AFL_VERIFY (columns.back ()->type ()->Equals (field->type ()))(" event" , " cannot_use_incoming_batch" )(" reason" , " invalid_column_type" )(" column" , field->name ())
@@ -47,14 +55,17 @@ TConclusion<std::shared_ptr<TDataContainer>> AdaptColumnsImpl(const std::shared_
4755 (" column_type" , field->ToString (true ))(" incoming_type" , srcField->ToString (true ));
4856 return TConclusionStatus::Fail (" incompatible column types" );
4957 }
50- } else {
58+ } else if (!subset) {
5159 AFL_ERROR (NKikimrServices::ARROW_HELPER)(" event" , " not_found_column" )(" column" , field->name ())
5260 (" column_type" , field->type ()->ToString ())(" columns" , JoinSeq (" ," , srcBatch->schema ()->field_names ()));
5361 return TConclusionStatus::Fail (" not found column '" + field->name () + " '" );
5462 }
63+ ++idx;
5564 }
56-
57- return NAdapter::TDataBuilderPolicy<TDataContainer>::Build (dstSchema, std::move (columns), srcBatch->num_rows ());
65+ if (subset) {
66+ *subset = TSchemaSubset (fieldIdx, dstSchema->num_fields ());
67+ }
68+ return NAdapter::TDataBuilderPolicy<TDataContainer>::Build (std::make_shared<arrow::Schema>(fields), std::move (columns), srcBatch->num_rows ());
5869}
5970
6071template <class TDataContainer , class TStringType >
@@ -114,12 +125,12 @@ std::shared_ptr<arrow::Table> TColumnOperator::Extract(const std::shared_ptr<arr
114125 return ExtractImpl (AbsentColumnPolicy, incoming, columnNames);
115126}
116127
117- NKikimr::TConclusion<std::shared_ptr<arrow::RecordBatch>> TColumnOperator::Adapt (const std::shared_ptr<arrow::RecordBatch>& incoming, const std::shared_ptr<arrow::Schema>& dstSchema) {
118- return AdaptColumnsImpl (incoming, dstSchema);
128+ NKikimr::TConclusion<std::shared_ptr<arrow::RecordBatch>> TColumnOperator::Adapt (const std::shared_ptr<arrow::RecordBatch>& incoming, const std::shared_ptr<arrow::Schema>& dstSchema, TSchemaSubset* subset ) {
129+ return AdaptColumnsImpl (incoming, dstSchema, subset );
119130}
120131
121- NKikimr::TConclusion<std::shared_ptr<arrow::Table>> TColumnOperator::Adapt (const std::shared_ptr<arrow::Table>& incoming, const std::shared_ptr<arrow::Schema>& dstSchema) {
122- return AdaptColumnsImpl (incoming, dstSchema);
132+ NKikimr::TConclusion<std::shared_ptr<arrow::Table>> TColumnOperator::Adapt (const std::shared_ptr<arrow::Table>& incoming, const std::shared_ptr<arrow::Schema>& dstSchema, TSchemaSubset* subset ) {
133+ return AdaptColumnsImpl (incoming, dstSchema, subset );
123134}
124135
125136NKikimr::TConclusion<std::shared_ptr<arrow::RecordBatch>> TColumnOperator::Reorder (const std::shared_ptr<arrow::RecordBatch>& incoming, const std::vector<std::string>& columnNames) {
0 commit comments