Skip to content

Commit

Permalink
[Fix](multi-catalog) Fix load string dict issue for transactional hiv…
Browse files Browse the repository at this point in the history
…e tables. (apache#23306)

Fix load string dict issue for transactional hive tables. The column name need to pass 'row.column_name'.

apache/doris-thirdparty#112
  • Loading branch information
kaka11chen authored Aug 25, 2023
1 parent 2b6d876 commit a3a951c
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 11 deletions.
2 changes: 1 addition & 1 deletion be/src/apache-orc
12 changes: 7 additions & 5 deletions be/src/vec/exec/format/orc/vorc_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -316,22 +316,24 @@ Status OrcReader::_init_read_columns() {
_missing_cols.emplace_back(col_name);
} else {
int pos = std::distance(orc_cols_lower_case.begin(), iter);
std::string read_col;
if (_is_acid && i < _column_names->size() - TransactionalHive::READ_PARAMS.size()) {
auto read_col = fmt::format(
read_col = fmt::format(
"{}.{}",
TransactionalHive::ACID_COLUMN_NAMES[TransactionalHive::ROW_OFFSET],
orc_cols[pos]);
_read_cols.emplace_back(read_col);
} else {
_read_cols.emplace_back(orc_cols[pos]);
read_col = orc_cols[pos];
_read_cols.emplace_back(read_col);
}
_read_cols_lower_case.emplace_back(col_name);
// For hive engine, store the orc column name to schema column name map.
// This is for Hive 1.x orc file with internal column name _col0, _col1...
if (_is_hive) {
_file_col_to_schema_col[orc_cols[pos]] = col_name;
_removed_acid_file_col_name_to_schema_col[orc_cols[pos]] = col_name;
}
_col_name_to_file_col_name[col_name] = orc_cols[pos];
_col_name_to_file_col_name[col_name] = read_col;
}
}
return Status::OK();
Expand Down Expand Up @@ -804,7 +806,7 @@ Status OrcReader::_init_select_types(const orc::Type& type, int idx) {
// For hive engine, translate the column name in orc file to schema column name.
// This is for Hive 1.x which use internal column name _col0, _col1...
if (_is_hive) {
name = _file_col_to_schema_col[type.getFieldName(i)];
name = _removed_acid_file_col_name_to_schema_col[type.getFieldName(i)];
} else {
name = _get_field_name_lower_case(&type, i);
}
Expand Down
4 changes: 2 additions & 2 deletions be/src/vec/exec/format/orc/vorc_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -510,10 +510,10 @@ class OrcReader : public GenericReader {
std::list<std::string> _read_cols_lower_case;
std::list<std::string> _missing_cols;
std::unordered_map<std::string, int> _colname_to_idx;
// Column name in Orc file to column name to schema.
// Column name in Orc file after removed acid(remove row.) to column name to schema.
// This is used for Hive 1.x which use internal column name in Orc file.
// _col0, _col1...
std::unordered_map<std::string, std::string> _file_col_to_schema_col;
std::unordered_map<std::string, std::string> _removed_acid_file_col_name_to_schema_col;
// Flag for hive engine. True if the external table engine is Hive.
bool _is_hive = false;
std::unordered_map<std::string, std::string> _col_name_to_file_col_name;
Expand Down
4 changes: 1 addition & 3 deletions regression-test/pipeline/p0/conf/regression-conf.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,7 @@ testDirectories = ""
// this groups will not be executed
excludeGroups = ""
// this suites will not be executed

excludeSuites = "test_full_compaction,test_default_limit,test_profile,test_broker_load,test_spark_load,test_refresh_mtmv,test_bitmap_filter,test_export_parquet,test_doris_jdbc_catalog,test_transactional_hive,nereids_delete_mow_partial_update,test_hdfs_tvf"

excludeSuites = "test_full_compaction,test_default_limit,test_profile,test_broker_load,test_spark_load,test_refresh_mtmv,test_bitmap_filter,test_export_parquet,test_doris_jdbc_catalog,nereids_delete_mow_partial_update,test_hdfs_tvf"
// this directories will not be executed
excludeDirectories = "workload_manager_p1"

Expand Down

0 comments on commit a3a951c

Please sign in to comment.