Skip to content

Commit

Permalink
[Enhancement] Support dict filter for struct type in parquet (StarRoc…
Browse files Browse the repository at this point in the history
…ks#28843)

* [Enhancement] Support dict filter for struct type in parquet

Signed-off-by: zombee0 <ewang2027@gmail.com>

* use field name to decouple struct childreader/columntype/fieldcolumn's index

Signed-off-by: zombee0 <ewang2027@gmail.com>
  • Loading branch information
zombee0 authored Aug 18, 2023
1 parent 4264907 commit 4ee6465
Show file tree
Hide file tree
Showing 19 changed files with 942 additions and 399 deletions.
12 changes: 11 additions & 1 deletion be/src/column/struct_column.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -484,7 +484,7 @@ Columns& StructColumn::fields_column() {
return _fields;
}

ColumnPtr StructColumn::field_column(const std::string& field_name) {
ColumnPtr StructColumn::field_column(const std::string& field_name) const {
for (size_t i = 0; i < _field_names.size(); i++) {
if (field_name == _field_names[i]) {
return _fields[i];
Expand All @@ -494,6 +494,16 @@ ColumnPtr StructColumn::field_column(const std::string& field_name) {
return nullptr;
}

ColumnPtr& StructColumn::field_column(const std::string& field_name) {
for (size_t i = 0; i < _field_names.size(); i++) {
if (field_name == _field_names[i]) {
return _fields[i];
}
}
DCHECK(false) << "Struct subfield name: " << field_name << " not found!";
return _fields[0];
}

Status StructColumn::unfold_const_children(const starrocks::TypeDescriptor& type) {
DCHECK(type.children.size() == _fields.size()) << "Struct schema does not match data's";
auto num_fields = type.children.size();
Expand Down
4 changes: 3 additions & 1 deletion be/src/column/struct_column.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,9 @@ class StructColumn final : public ColumnFactory<Column, StructColumn> {

Columns& fields_column();

ColumnPtr field_column(const std::string& field_name);
ColumnPtr field_column(const std::string& field_name) const;

ColumnPtr& field_column(const std::string& field_name);

const std::vector<std::string>& field_names() const { return _field_names; }

Expand Down
10 changes: 10 additions & 0 deletions be/src/exprs/expr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,16 @@ int Expr::get_slot_ids(std::vector<SlotId>* slot_ids) const {
return n;
}

int Expr::get_subfields(std::vector<std::vector<std::string>>* subfields) const {
int n = 0;

for (auto i : _children) {
n += i->get_subfields(subfields);
}

return n;
}

Expr* Expr::copy(ObjectPool* pool, Expr* old_expr) {
auto new_expr = old_expr->clone(pool);
for (auto child : old_expr->_children) {
Expand Down
2 changes: 2 additions & 0 deletions be/src/exprs/expr.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ class Expr {
// Returns the number of slots added to the vector
virtual int get_slot_ids(std::vector<SlotId>* slot_ids) const;

virtual int get_subfields(std::vector<std::vector<std::string>>* subfields) const;

/// Create expression tree from the list of nodes contained in texpr within 'pool'.
/// Returns the root of expression tree in 'expr' and the corresponding ExprContext in
/// 'ctx'.
Expand Down
5 changes: 5 additions & 0 deletions be/src/exprs/subfield_expr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@ class SubfieldExpr final : public Expr {

Expr* clone(ObjectPool* pool) const override { return pool->add(new SubfieldExpr(*this)); }

int get_subfields(std::vector<std::vector<std::string>>* subfields) const override {
subfields->push_back(_used_subfield_names);
return 1;
}

private:
std::vector<std::string> _used_subfield_names;
};
Expand Down
Loading

0 comments on commit 4ee6465

Please sign in to comment.