Skip to content

Commit

Permalink
[fix](stats) Fix auto analyze (apache#20426)
Browse files Browse the repository at this point in the history
We only reanalyze those partition that lastVisibleTime is later than job's updatetime, so we shouldn't set this field when creat e system jobs
  • Loading branch information
Kikyou1997 authored Aug 25, 2023
1 parent e3db0fd commit 006c888
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,6 @@ public List<AnalysisInfo> constructAnalysisInfo(DatabaseIf<? extends TableIf> db
.setScheduleType(AnalysisInfo.ScheduleType.ONCE)
.setState(AnalysisState.PENDING)
.setTaskIds(new ArrayList<>())
.setLastExecTimeInMs(System.currentTimeMillis())
.setJobType(JobType.SYSTEM).build();
analysisInfos.add(jobInfo);
}
Expand All @@ -155,8 +154,7 @@ private void analyzePeriodically() {
}

@VisibleForTesting
public AnalysisInfo getReAnalyzeRequiredPart(AnalysisInfo jobInfo) {
long lastExecTimeInMs = jobInfo.lastExecTimeInMs;
protected AnalysisInfo getReAnalyzeRequiredPart(AnalysisInfo jobInfo) {
TableIf table = StatisticsUtil
.findTable(jobInfo.catalogName, jobInfo.dbName, jobInfo.tblName);
TableStats tblStats = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(table.getId());
Expand All @@ -165,7 +163,7 @@ public AnalysisInfo getReAnalyzeRequiredPart(AnalysisInfo jobInfo) {
return null;
}

Set<String> needRunPartitions = findReAnalyzeNeededPartitions(table, lastExecTimeInMs);
Set<String> needRunPartitions = findReAnalyzeNeededPartitions(table, tblStats);

if (needRunPartitions.isEmpty()) {
return null;
Expand All @@ -175,12 +173,16 @@ public AnalysisInfo getReAnalyzeRequiredPart(AnalysisInfo jobInfo) {
}

@VisibleForTesting
public Set<String> findReAnalyzeNeededPartitions(TableIf table, long lastExecTimeInMs) {
protected Set<String> findReAnalyzeNeededPartitions(TableIf table, TableStats tableStats) {
if (tableStats == null) {
return table.getPartitionNames().stream().map(table::getPartition)
.filter(Partition::hasData).map(Partition::getName).collect(Collectors.toSet());
}
return table.getPartitionNames().stream()
.map(table::getPartition)
.filter(Partition::hasData)
.filter(partition ->
partition.getVisibleVersionTime() >= lastExecTimeInMs).map(Partition::getName)
partition.getVisibleVersionTime() >= tableStats.updatedTime).map(Partition::getName)
.collect(Collectors.toSet());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ public TableStats findTableStatsStatus(long tblId) {

new MockUp<StatisticsAutoAnalyzer>() {
@Mock
public Set<String> findReAnalyzeNeededPartitions(TableIf table, long lastExecTimeInMs) {
protected Set<String> findReAnalyzeNeededPartitions(TableIf table, TableStats tableStats) {
Set<String> partitionNames = new HashSet<>();
partitionNames.add("p1");
partitionNames.add("p2");
Expand Down

0 comments on commit 006c888

Please sign in to comment.