Skip to content

Commit

Permalink
Handle missing fields when read flatmap as struct
Browse files Browse the repository at this point in the history
Summary:
When we read flatmap as struct, the presence of a field is not decided
the same way as a normal struct field.  We need to handle this separately and
set `ScanSpec::subscript` according to the flatmap specific logic, and also
leverage this subscript in the subsequent reading to treat it as constant.

Differential Revision: D63469632
  • Loading branch information
Yuhta authored and facebook-github-bot committed Sep 26, 2024
1 parent afa43a7 commit 0677669
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 11 deletions.
8 changes: 4 additions & 4 deletions velox/dwio/common/SelectiveStructColumnReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,11 +271,11 @@ void SelectiveStructColumnReaderBase::recordParentNullsInChildren(
}
}

bool SelectiveStructColumnReaderBase::isChildConstant(
bool SelectiveStructColumnReaderBase::isChildMissing(
const velox::common::ScanSpec& childSpec) const {
// Returns true if the child has a constant set in the ScanSpec, or if the
// file doesn't have this child (in which case it will be treated as null).
return childSpec.isConstant() ||
// Returns true if the file doesn't have this child (in which case it will be
// treated as null).
return
// The below check is trying to determine if this is a missing field in a
// struct that should be constant null.
(!isRoot_ && // If we're in the root struct channel is meaningless in this
Expand Down
10 changes: 7 additions & 3 deletions velox/dwio/common/SelectiveStructColumnReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,9 +133,13 @@ class SelectiveStructColumnReaderBase : public SelectiveColumnReader {
return hasDeletion_;
}

// Returns true if we'll return a constant for that childSpec (i.e. we don't
// need to read it).
bool isChildConstant(const velox::common::ScanSpec& childSpec) const;
bool isChildMissing(const velox::common::ScanSpec& childSpec) const;

bool isChildConstant(const velox::common::ScanSpec& childSpec) const {
return childSpec.isConstant() ||
childSpec.subscript() == kConstantChildSpecSubscript ||
isChildMissing(childSpec);
}

void fillOutputRowsFromMutation(vector_size_t size);

Expand Down
3 changes: 3 additions & 0 deletions velox/dwio/dwrf/reader/SelectiveFlatMapColumnReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,9 @@ class SelectiveFlatMapAsStructReader : public SelectiveStructColumnReaderBase {
!keyNodes_.empty(),
"For struct encoding, keys to project must be configured");
children_.resize(keyNodes_.size());
for (auto& childSpec : scanSpec.children()) {
childSpec->setSubscript(kConstantChildSpecSubscript);
}
for (int i = 0; i < keyNodes_.size(); ++i) {
keyNodes_[i].reader->scanSpec()->setSubscript(i);
children_[i] = keyNodes_[i].reader.get();
Expand Down
2 changes: 1 addition & 1 deletion velox/dwio/dwrf/reader/SelectiveStructColumnReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ SelectiveStructColumnReader::SelectiveStructColumnReader(
if (childSpec->isExplicitRowNumber()) {
continue;
}
if (isChildConstant(*childSpec)) {
if (childSpec->isConstant() || isChildMissing(*childSpec)) {
childSpec->setSubscript(kConstantChildSpecSubscript);
continue;
}
Expand Down
3 changes: 2 additions & 1 deletion velox/dwio/parquet/reader/StructColumnReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ StructColumnReader::StructColumnReader(
auto& childSpecs = scanSpec_->stableChildren();
for (auto i = 0; i < childSpecs.size(); ++i) {
auto childSpec = childSpecs[i];
if (childSpecs[i]->isConstant()) {
if (childSpec->isConstant() || isChildMissing(*childSpec)) {
childSpec->setSubscript(kConstantChildSpecSubscript);
continue;
}
if (childSpecs[i]->isExplicitRowNumber()) {
Expand Down
17 changes: 15 additions & 2 deletions velox/exec/tests/TableScanTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4733,13 +4733,14 @@ TEST_F(TableScanTest, partitionKeyNotMatchPartitionKeysHandle) {
TEST_F(TableScanTest, readFlatMapAsStruct) {
constexpr int kSize = 10;
std::vector<std::string> keys = {"1", "2", "3"};
auto vector = makeRowVector({makeRowVector(
auto c0 = makeRowVector(
keys,
{
makeFlatVector<int64_t>(kSize, folly::identity),
makeFlatVector<int64_t>(kSize, folly::identity, nullEvery(5)),
makeFlatVector<int64_t>(kSize, folly::identity, nullEvery(7)),
})});
});
auto vector = makeRowVector({c0});
auto config = std::make_shared<dwrf::Config>();
config->set(dwrf::Config::FLATTEN_MAP, true);
config->set<const std::vector<uint32_t>>(dwrf::Config::MAP_FLAT_COLS, {0});
Expand All @@ -4753,6 +4754,18 @@ TEST_F(TableScanTest, readFlatMapAsStruct) {
PlanBuilder().tableScan(readSchema, {}, "", writeSchema).planNode();
auto split = makeHiveConnectorSplit(file->getPath());
AssertQueryBuilder(plan).split(split).assertResults(vector);
readSchema =
ROW({"c0"}, {ROW({"1", "4", "2"}, {BIGINT(), BIGINT(), BIGINT()})});
plan = PlanBuilder().tableScan(readSchema, {}, "", writeSchema).planNode();
split = makeHiveConnectorSplit(file->getPath());
auto expected = makeRowVector({makeRowVector(
{"1", "4", "2"},
{
c0->childAt(0),
makeNullConstant(TypeKind::BIGINT, kSize),
c0->childAt(1),
})});
AssertQueryBuilder(plan).split(split).assertResults(expected);
}

TEST_F(TableScanTest, dynamicFilters) {
Expand Down

0 comments on commit 0677669

Please sign in to comment.