Skip to content

Commit

Permalink
IMPALA-11744: Table mask view should preserve the original column ord…
Browse files Browse the repository at this point in the history
…er in Hive

Ranger provides column masking and row filtering policies to mask
sensitive data for specific users/groups. When a table should be masked
in a query, Impala replaces it with a table mask view that exposes the
columns with masked expressions.

After IMPALA-9661, only selected columns are exposed in the table mask
view. However, the columns of the view are exposed in the order that
they are registered. If the registering order differs from the column
order in the table, STAR expansions will mismatch the columns.

To be specific, let's say table 'tbl' with 3 columns a, b, c should be
masked in the following query:
  select b, * from tbl;
Ideally Impala should replace the TableRef of 'tbl' with a table mask
view as:
  select b, * from (
    select mask(a) a, mask(b) b, mask(c) c from tbl
  ) t;

Currently, the rewritten query is
  select b, * from (
    select mask(b) b, mask(a) a, mask(c) c from tbl
  ) t;
This incorrectly expands the STAR as "b, a, c" in the re-analyze phase.

The cause is that column 'b' is registered earlier than all other
columns. This patch fixes it by sorting the selected columns based on
their original order in the table.

Tests:
 - Add tests for selecting STAR with normal columns on table and view.

Change-Id: Ic83d78312b19fa2c5ab88ac4f359bfabaeaabce6
Reviewed-on: http://gerrit.cloudera.org:8080/19279
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
  • Loading branch information
stiga-huang authored and Impala Public Jenkins committed Dec 6, 2022
1 parent 6838e98 commit 3673784
Show file tree
Hide file tree
Showing 6 changed files with 474 additions and 6 deletions.
10 changes: 7 additions & 3 deletions fe/src/main/java/org/apache/impala/analysis/Analyzer.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;

Expand Down Expand Up @@ -939,7 +938,10 @@ public TableRef resolveTableMask(TableRef resolvedTableRef) throws AnalysisExcep
dbName = resolvedTableRef.getTable().getDb().getName();
tblName = resolvedTableRef.getTable().getName();
}
List<Column> columns = resolvedTableRef.getColumns();
// The selected columns should be in the same relative order as they are in the
// corresponding Hive table so that the order of the SelectListItem's in the
// table mask view (if needs masking or filtering) would be correct.
List<Column> columns = resolvedTableRef.getSelectedColumnsInHiveOrder();
TableMask tableMask = new TableMask(authChecker, dbName, tblName, columns, user_);
try {
if (resolvedTableRef instanceof CollectionTableRef) {
Expand Down Expand Up @@ -1609,7 +1611,9 @@ private void registerColumnPrivReq(SlotDescriptor slotDesc) {
}

/**
* Register scalar columns. Used in resolving column mask.
* Register columns for resolving column mask. The order in which columns are registered
* is not necessarily the same as the relative order of those columns in the
* corresponding Hive table.
*/
public void registerColumnForMasking(SlotDescriptor slotDesc) {
Preconditions.checkNotNull(slotDesc.getPath());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,11 @@ public List<String> getColLabels() {
return queryStmt_.getColLabels();
}

@Override
public List<Column> getColumnsInHiveOrder() {
return view_.getColumnsInHiveOrder();
}

public FeView getView() { return view_; }

public boolean isTableMaskingView() { return isTableMaskingView_; }
Expand Down
33 changes: 30 additions & 3 deletions fe/src/main/java/org/apache/impala/analysis/TableRef.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
Expand Down Expand Up @@ -156,7 +157,7 @@ public enum ZippingUnnestType {
protected boolean exposeNestedColumnsByTableMaskView_ = false;

// Columns referenced in the query. Used in resolving column mask.
protected Map<String, Column> columns_ = new LinkedHashMap<>();
protected Map<String, Column> columns_ = new HashMap<>();

// Time travel spec of this table ref. It contains information specified in the
// FOR SYSTEM_TIME AS OF <timestamp> or FOR SYSTEM_TIME AS OF <version> clause.
Expand Down Expand Up @@ -778,8 +779,34 @@ public void registerColumn(Column column) {
columns_.put(column.getName(), column);
}

public List<Column> getColumns() {
return new ArrayList<>(columns_.values());
/**
* @return an unmodifiable list of all columns, but with partition columns at the end of
* the list rather than the beginning. This is equivalent to the order in which Hive
* enumerates columns.
*/
public List<Column> getColumnsInHiveOrder() {
return getTable().getColumnsInHiveOrder();
}

public List<Column> getSelectedColumnsInHiveOrder() {
// Map from column name to the Column object (null if not selected).
// Use LinkedHashMap to preserve the order.
Map<String, Column> colSelection = new LinkedHashMap<>();
for (Column c : getColumnsInHiveOrder()) {
colSelection.put(c.getName(), null);
}
// Update 'colSelection' with selected columns. Virtual columns will also be added.
for (String colName : columns_.keySet()) {
colSelection.put(colName, columns_.get(colName));
}
List<Column> res = new ArrayList<>();
for (Column c : colSelection.values()) {
if (c != null) res.add(c);
}
// Make sure not missing any columns
Preconditions.checkState(res.size() == columns_.size(),
"missing columns: " + res.size() + " != " + columns_.size());
return res;
}

void migratePropertiesTo(TableRef other) {
Expand Down
Loading

0 comments on commit 3673784

Please sign in to comment.