Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[improvement](statistics)Support identical column name in different index. #32792

Merged
merged 1 commit into from
Mar 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.qe.ShowResultSet;
import org.apache.doris.qe.ShowResultSetMetaData;
import org.apache.doris.statistics.AnalysisManager;
import org.apache.doris.statistics.ColStatsMeta;
import org.apache.doris.statistics.ColumnStatistic;

Expand Down Expand Up @@ -138,23 +139,23 @@ public TableIf getTable() {

public ShowResultSet constructResultSet(List<Pair<Pair<String, String>, ColumnStatistic>> columnStatistics) {
List<List<String>> result = Lists.newArrayList();
AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager();
columnStatistics.forEach(p -> {
if (p.second.isUnKnown) {
return;
}

List<String> row = Lists.newArrayList();
row.add(p.first.first);
// p data structure is Pair<Pair<IndexName, ColumnName>, ColumnStatistic>
row.add(p.first.second);
row.add(p.first.first);
row.add(String.valueOf(p.second.count));
row.add(String.valueOf(p.second.ndv));
row.add(String.valueOf(p.second.numNulls));
row.add(String.valueOf(p.second.dataSize));
row.add(String.valueOf(p.second.avgSizeByte));
row.add(String.valueOf(p.second.minExpr == null ? "N/A" : p.second.minExpr.toSql()));
row.add(String.valueOf(p.second.maxExpr == null ? "N/A" : p.second.maxExpr.toSql()));
ColStatsMeta colStatsMeta = Env.getCurrentEnv().getAnalysisManager().findColStatsMeta(table.getId(),
p.first.first);
ColStatsMeta colStatsMeta = analysisManager.findColStatsMeta(table.getId(), p.first.first, p.first.second);
row.add(String.valueOf(colStatsMeta == null ? "N/A" : colStatsMeta.analysisMethod));
row.add(String.valueOf(colStatsMeta == null ? "N/A" : colStatsMeta.analysisType));
row.add(String.valueOf(colStatsMeta == null ? "N/A" : colStatsMeta.jobType));
Expand Down
38 changes: 12 additions & 26 deletions fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
Original file line number Diff line number Diff line change
Expand Up @@ -1279,11 +1279,11 @@ public boolean needReAnalyzeTable(TableStatsMeta tblStats) {
if (tblStats == null) {
return true;
}
if (!tblStats.analyzeColumns().containsAll(getBaseSchema()
if (!tblStats.analyzeColumns().containsAll(getColumnIndexPairs(getSchemaAllIndexes(false)
.stream()
.filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
.map(Column::getName)
.collect(Collectors.toSet()))) {
.collect(Collectors.toSet())))) {
return true;
}
long rowCount = getRowCount();
Expand All @@ -1296,34 +1296,20 @@ public boolean needReAnalyzeTable(TableStatsMeta tblStats) {
}

@Override
public Map<String, Set<String>> findReAnalyzeNeededPartitions() {
TableStatsMeta tableStats = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(getId());
Set<String> allPartitions = getPartitionNames().stream().map(this::getPartition)
.filter(Partition::hasData).map(Partition::getName).collect(Collectors.toSet());
if (tableStats == null) {
Map<String, Set<String>> ret = Maps.newHashMap();
for (Column col : getSchemaAllIndexes(false)) {
if (StatisticsUtil.isUnsupportedType(col.getType())) {
public List<Pair<String, String>> getColumnIndexPairs(Set<String> columns) {
List<Pair<String, String>> ret = Lists.newArrayList();
// Check the schema of all indexes for each given column name,
// If the column name exists in the index, add the <IndexName, ColumnName> pair to return list.
for (String column : columns) {
for (MaterializedIndexMeta meta : indexIdToMeta.values()) {
Column col = meta.getColumnByName(column);
if (col == null || StatisticsUtil.isUnsupportedType(col.getType())) {
continue;
}
ret.put(col.getName(), allPartitions);
ret.add(Pair.of(getIndexNameById(meta.getIndexId()), column));
}
return ret;
}
Map<String, Set<String>> colToPart = new HashMap<>();
for (Column col : getSchemaAllIndexes(false)) {
if (StatisticsUtil.isUnsupportedType(col.getType())) {
continue;
}
long lastUpdateTime = tableStats.findColumnLastUpdateTime(col.getName());
Set<String> partitions = getPartitionNames().stream()
.map(this::getPartition)
.filter(Partition::hasData)
.filter(partition -> partition.getVisibleVersionTime() >= lastUpdateTime).map(Partition::getName)
.collect(Collectors.toSet());
colToPart.put(col.getName(), partitions);
}
return colToPart;
return ret;
}

@Override
Expand Down
11 changes: 6 additions & 5 deletions fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.doris.common.ErrorCode;
import org.apache.doris.common.FeMetaVersion;
import org.apache.doris.common.MetaNotFoundException;
import org.apache.doris.common.Pair;
import org.apache.doris.common.io.Text;
import org.apache.doris.common.io.Writable;
import org.apache.doris.common.util.QueryableReentrantReadWriteLock;
Expand Down Expand Up @@ -647,11 +648,6 @@ public boolean needReAnalyzeTable(TableStatsMeta tblStats) {
return true;
}

@Override
public Map<String, Set<String>> findReAnalyzeNeededPartitions() {
return Collections.emptyMap();
}

@Override
public List<Long> getChunkSizes() {
throw new NotImplementedException("getChunkSized not implemented");
Expand All @@ -661,4 +657,9 @@ public List<Long> getChunkSizes() {
public long fetchRowCount() {
return 0;
}

@Override
public List<Pair<String, String>> getColumnIndexPairs(Set<String> columns) {
return Lists.newArrayList();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.apache.doris.cluster.ClusterNamespace;
import org.apache.doris.common.DdlException;
import org.apache.doris.common.MetaNotFoundException;
import org.apache.doris.common.Pair;
import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.persist.AlterConstraintLog;
import org.apache.doris.statistics.AnalysisInfo;
Expand Down Expand Up @@ -184,7 +185,11 @@ default long getRowCountForNereids() {

boolean needReAnalyzeTable(TableStatsMeta tblStats);

Map<String, Set<String>> findReAnalyzeNeededPartitions();
/**
* @param columns Set of column names.
* @return List of pairs. Each pair is <IndexName, ColumnName>. For external table, index name is table name.
*/
List<Pair<String, String>> getColumnIndexPairs(Set<String> columns);

// Get all the chunk sizes of this table. Now, only HMS external table implemented this interface.
// For HMS external table, the return result is a list of all the files' size.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.doris.catalog.TableIf;
import org.apache.doris.catalog.constraint.Constraint;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.Pair;
import org.apache.doris.common.io.Text;
import org.apache.doris.common.io.Writable;
import org.apache.doris.common.util.Util;
Expand All @@ -36,7 +37,7 @@
import org.apache.doris.statistics.util.StatisticsUtil;
import org.apache.doris.thrift.TTableDescriptor;

import com.google.common.collect.Sets;
import com.google.common.collect.Lists;
import com.google.gson.annotations.SerializedName;
import lombok.Getter;
import org.apache.commons.lang3.NotImplementedException;
Expand All @@ -46,7 +47,6 @@
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
Expand Down Expand Up @@ -322,24 +322,30 @@ public boolean needReAnalyzeTable(TableStatsMeta tblStats) {
if (tblStats == null) {
return true;
}
if (!tblStats.analyzeColumns().containsAll(getBaseSchema()
if (!tblStats.analyzeColumns().containsAll(getColumnIndexPairs(
getBaseSchema()
.stream()
.filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
.map(Column::getName)
.collect(Collectors.toSet()))) {
.collect(Collectors.toSet())))) {
return true;
}
return System.currentTimeMillis()
- tblStats.updatedTime > StatisticsUtil.getExternalTableAutoAnalyzeIntervalInMillis();
}

@Override
public Map<String, Set<String>> findReAnalyzeNeededPartitions() {
HashSet<String> partitions = Sets.newHashSet();
// TODO: Find a way to collect external table partitions that need to be analyzed.
partitions.add("Dummy Partition");
return getBaseSchema().stream().filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
.collect(Collectors.toMap(Column::getName, k -> partitions));
public List<Pair<String, String>> getColumnIndexPairs(Set<String> columns) {
List<Pair<String, String>> ret = Lists.newArrayList();
for (String column : columns) {
Column col = getColumn(column);
if (col == null || StatisticsUtil.isUnsupportedType(col.getType())) {
continue;
}
// External table put table name as index name.
ret.add(Pair.of(String.valueOf(name), column));
}
return ret;
}

@Override
Expand Down
37 changes: 20 additions & 17 deletions fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
Original file line number Diff line number Diff line change
Expand Up @@ -2532,16 +2532,18 @@ private void handleShowColumnStats() throws AnalysisException {
private void getStatsForAllColumns(List<Pair<Pair<String, String>, ColumnStatistic>> columnStatistics,
TableIf tableIf) throws AnalysisException {
List<ResultRow> resultRows = StatisticsRepository.queryColumnStatisticsForTable(tableIf.getId());
// row[4] is index id, row[5] is column name.
for (ResultRow row : resultRows) {
String indexName = "N/A";
String indexName = tableIf.getName();
long indexId = Long.parseLong(row.get(4));
if (indexId != -1) {
indexName = ((OlapTable) tableIf).getIndexNameById(indexId);
if (indexName == null) {
continue;
}
if (tableIf instanceof OlapTable) {
OlapTable olapTable = (OlapTable) tableIf;
indexName = olapTable.getIndexNameById(indexId == -1 ? olapTable.getBaseIndexId() : indexId);
}
if (indexName == null) {
continue;
}
columnStatistics.add(Pair.of(Pair.of(row.get(5), indexName), ColumnStatistic.fromResultRow(row)));
columnStatistics.add(Pair.of(Pair.of(indexName, row.get(5)), ColumnStatistic.fromResultRow(row)));
}
}

Expand All @@ -2558,28 +2560,29 @@ private void getStatsForSpecifiedColumns(List<Pair<Pair<String, String>, ColumnS
indexIds.add(-1L);
}
for (long indexId : indexIds) {
String indexName = "N/A";
if (indexId != -1) {
indexName = ((OlapTable) tableIf).getIndexNameById(indexId);
if (indexName == null) {
continue;
}
String indexName = tableIf.getName();
if (tableIf instanceof OlapTable) {
OlapTable olapTable = (OlapTable) tableIf;
indexName = olapTable.getIndexNameById(indexId == -1 ? olapTable.getBaseIndexId() : indexId);
}
if (indexName == null) {
continue;
}
// Show column statistics in columnStatisticsCache.
if (showCache) {
ColumnStatistic columnStatistic = Env.getCurrentEnv().getStatisticsCache().getColumnStatistics(
tableIf.getDatabase().getCatalog().getId(),
tableIf.getDatabase().getId(), tableIf.getId(), indexId, colName);
columnStatistics.add(Pair.of(Pair.of(colName, indexName), columnStatistic));
columnStatistics.add(Pair.of(Pair.of(indexName, colName), columnStatistic));
} else if (partitionNames == null) {
ColumnStatistic columnStatistic =
StatisticsRepository.queryColumnStatisticsByName(tableIf.getId(), indexId, colName);
columnStatistics.add(Pair.of(Pair.of(colName, indexName), columnStatistic));
columnStatistics.add(Pair.of(Pair.of(indexName, colName), columnStatistic));
} else {
String finalIndexName = indexName;
columnStatistics.addAll(StatisticsRepository.queryColumnStatisticsByPartitions(tableName,
colName, partitionNames.getPartitionNames())
.stream().map(s -> Pair.of(Pair.of(colName, finalIndexName), s))
.stream().map(s -> Pair.of(Pair.of(finalIndexName, colName), s))
.collect(Collectors.toList()));
}
}
Expand Down Expand Up @@ -2983,7 +2986,7 @@ private void handleShowAnalyzeTaskStatus() {
if (table instanceof OlapTable && analysisInfo.indexId != -1) {
row.add(((OlapTable) table).getIndexNameById(analysisInfo.indexId));
} else {
row.add("N/A");
row.add(table.getName());
}
row.add(analysisInfo.message);
row.add(TimeUtils.DATETIME_FORMAT.format(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.doris.statistics;

import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.Pair;
import org.apache.doris.common.io.Text;
import org.apache.doris.common.io.Writable;
import org.apache.doris.persist.gson.GsonUtils;
Expand All @@ -35,7 +36,6 @@
import java.io.IOException;
import java.text.ParseException;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.StringJoiner;

Expand Down Expand Up @@ -95,8 +95,8 @@ public enum ScheduleType {
@SerializedName("tblId")
public final long tblId;

// TODO: Map here is wired, List is enough
public final Map<String, Set<String>> colToPartitions;
// Pair<IndexName, ColumnName>
public final List<Pair<String, String>> jobColumns;

public final Set<String> partitionNames;

Expand Down Expand Up @@ -200,7 +200,7 @@ public enum ScheduleType {
public final boolean userInject;

public AnalysisInfo(long jobId, long taskId, List<Long> taskIds, long catalogId, long dbId, long tblId,
Map<String, Set<String>> colToPartitions, Set<String> partitionNames, String colName, Long indexId,
List<Pair<String, String>> jobColumns, Set<String> partitionNames, String colName, Long indexId,
JobType jobType, AnalysisMode analysisMode, AnalysisMethod analysisMethod, AnalysisType analysisType,
int samplePercent, long sampleRows, int maxBucketNum, long periodTimeInMs, String message,
long lastExecTimeInMs, long timeCostInMs, AnalysisState state, ScheduleType scheduleType,
Expand All @@ -213,7 +213,7 @@ public AnalysisInfo(long jobId, long taskId, List<Long> taskIds, long catalogId,
this.catalogId = catalogId;
this.dbId = dbId;
this.tblId = tblId;
this.colToPartitions = colToPartitions;
this.jobColumns = jobColumns;
this.partitionNames = partitionNames;
this.colName = colName;
this.indexId = indexId;
Expand Down Expand Up @@ -268,8 +268,8 @@ public String toString() {
if (maxBucketNum > 0) {
sj.add("MaxBucketNum: " + maxBucketNum);
}
if (colToPartitions != null) {
sj.add("colToPartitions: " + getColToPartitionStr());
if (jobColumns != null) {
sj.add("jobColumns: " + getJobColumns());
}
if (lastExecTimeInMs > 0) {
sj.add("LastExecTime: " + StatisticsUtil.getReadableTime(lastExecTimeInMs));
Expand Down Expand Up @@ -301,12 +301,12 @@ public void addTaskId(long taskId) {
taskIds.add(taskId);
}

public String getColToPartitionStr() {
if (colToPartitions == null || colToPartitions.isEmpty()) {
public String getJobColumns() {
if (jobColumns == null || jobColumns.isEmpty()) {
return "";
}
Gson gson = new Gson();
return gson.toJson(colToPartitions);
return gson.toJson(jobColumns);
}

@Override
Expand Down
Loading
Loading