Skip to content

Commit

Permalink
[New feature](statistics) Step1: Statistics collection framework
Browse files Browse the repository at this point in the history
Framework code for statistics collection,
containing only the main data structures, no implementation details.
This pr will not affect any existing code
and users will not be able to create statistics job.
  • Loading branch information
EmmyMiao87 committed Jan 25, 2022
1 parent ee0037e commit a9da0d2
Show file tree
Hide file tree
Showing 17 changed files with 726 additions and 21 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.analysis;

/**
* Collect statistics about a database
*
* syntax:
* ANALYZE [[ db_name.tb_name ] [( column_name [, ...] )], ...] [ PROPERTIES(...) ]
*
* db_name.tb_name: collect table and column statistics from tb_name
*
* column_name: collect column statistics from column_name
*
* properties: properties of statistics jobs
*
*/
public class AnalyzeStmt extends DdlStmt {

}
14 changes: 7 additions & 7 deletions fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,6 @@

package org.apache.doris.analysis;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;

import org.apache.doris.catalog.Catalog;
import org.apache.doris.catalog.Function;
import org.apache.doris.catalog.FunctionSet;
Expand All @@ -45,6 +38,13 @@
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;


public class CastExpr extends Expr {
private static final Logger LOG = LogManager.getLogger(CastExpr.class);
Expand Down
22 changes: 22 additions & 0 deletions fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,10 @@
import org.apache.doris.qe.VariableMgr;
import org.apache.doris.resource.Tag;
import org.apache.doris.service.FrontendOptions;
import org.apache.doris.statistics.StatisticsJobManager;
import org.apache.doris.statistics.StatisticsJobScheduler;
import org.apache.doris.statistics.StatisticsManager;
import org.apache.doris.statistics.StatisticsTaskScheduler;
import org.apache.doris.system.Backend;
import org.apache.doris.system.Backend.BackendState;
import org.apache.doris.system.Frontend;
Expand Down Expand Up @@ -412,7 +415,11 @@ public class Catalog {
private DeployManager deployManager;

private TabletStatMgr tabletStatMgr;
// statistics
private StatisticsManager statisticsManager;
private StatisticsJobManager statisticsJobManager;
private StatisticsJobScheduler statisticsJobScheduler;
private StatisticsTaskScheduler statisticsTaskScheduler;

private PaloAuth auth;

Expand Down Expand Up @@ -568,7 +575,11 @@ private Catalog(boolean isCheckpointCatalog) {
this.globalTransactionMgr = new GlobalTransactionMgr(this);

this.tabletStatMgr = new TabletStatMgr();
// statistics
this.statisticsManager = new StatisticsManager();
this.statisticsJobManager = new StatisticsJobManager();
this.statisticsJobScheduler = new StatisticsJobScheduler();
this.statisticsTaskScheduler = new StatisticsTaskScheduler();

this.auth = new PaloAuth();
this.domainResolver = new DomainResolver(auth);
Expand Down Expand Up @@ -724,9 +735,20 @@ public Checkpoint getCheckpointer() {
return checkpointer;
}

// statistics
public StatisticsManager getStatisticsManager() {
return statisticsManager;
}
public StatisticsJobManager getStatisticsJobManager() {
return statisticsJobManager;
}
public StatisticsJobScheduler getStatisticsJobScheduler() {
return statisticsJobScheduler;
}
public StatisticsTaskScheduler getStatisticsTaskScheduler() {
return statisticsTaskScheduler;
}


// Use tryLock to avoid potential dead lock
private boolean tryLock(boolean mustLock) {
Expand Down
19 changes: 19 additions & 0 deletions fe/fe-core/src/main/java/org/apache/doris/common/Config.java
Original file line number Diff line number Diff line change
Expand Up @@ -1582,4 +1582,23 @@ public class Config extends ConfigBase {
*/
@ConfField
public static boolean allow_replica_on_same_host = false;

// statistics
/*
* the max unfinished statistics job number
*/
@ConfField(mutable = true, masterOnly = true)
public static int max_statistics_job_num = 20;
/*
* the concurrency of statistics task
*/
// TODO change it to mutable true
@ConfField(mutable = false, masterOnly = true)
public static int concurrency_statistics_task_num = 1;
/*
* default sample percentage
* The value from 0 ~ 100. The 100 means no sampling and fetch all data.
*/
@ConfField(mutable = true, masterOnly = true)
public static int default_sample_percentage = 10;
}
Original file line number Diff line number Diff line change
Expand Up @@ -298,20 +298,6 @@ private PlanFragment createTableFunctionFragment(PlanNode node, PlanFragment chi
return childFragment;
}

/**
* When broadcastCost and partitionCost are equal, there is no uniform standard for which join implementation is better.
* Some scenarios are suitable for broadcast join, and some scenarios are suitable for shuffle join.
* Therefore, we add a SessionVariable to help users choose a better join implementation.
*/
private boolean isBroadcastCostSmaller(long broadcastCost, long partitionCost) {
String joinMethod = ConnectContext.get().getSessionVariable().getPreferJoinMethod();
if (joinMethod.equalsIgnoreCase("broadcast")) {
return broadcastCost <= partitionCost;
} else {
return broadcastCost < partitionCost;
}
}

/**
* There are 4 kinds of distributed hash join methods in Doris:
* Colocate, Bucket Shuffle, Broadcast, Shuffle
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.statistics;

import java.util.List;

/*
A statistics task that directly collects statistics by reading FE meta.
*/
public class MetaStatisticsTask extends StatisticsTask {

public MetaStatisticsTask(long jobId, StatsGranularityDesc granularityDesc,
StatsCategoryDesc categoryDesc, List<StatsType> statsTypeList) {
super(jobId, granularityDesc, categoryDesc, statsTypeList);
}

@Override
public StatisticsTaskResult call() throws Exception {
// TODO
return null;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.statistics;

import org.apache.doris.analysis.SelectStmt;

import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;

import java.util.List;
import java.util.Map;

/*
A statistics task that collects statistics by executing query.
The results of the query will be returned as @StatisticsTaskResult.
*/
public class SQLStatisticsTask extends StatisticsTask {
private SelectStmt query;

public SQLStatisticsTask(long jobId, StatsGranularityDesc granularityDesc,
StatsCategoryDesc categoryDesc, List<StatsType> statsTypeList) {
super(jobId, granularityDesc, categoryDesc, statsTypeList);
}

@Override
public StatisticsTaskResult call() throws Exception {
// TODO
// step1: construct query by statsDescList
constructQuery();
// step2: execute query
// the result should be sequence by @statsTypeList
List<String> queryResultList = executeQuery(query);
// step3: construct StatisticsTaskResult by query result
constructTaskResult(queryResultList);
return null;
}

protected void constructQuery() {
// TODO
// step1: construct FROM by @granularityDesc
// step2: construct SELECT LIST by @statsTypeList
}

protected List<String> executeQuery(SelectStmt query) {
// TODO (ML)
return null;
}

protected StatisticsTaskResult constructTaskResult(List<String> queryResultList) {
Preconditions.checkState(statsTypeList.size() == queryResultList.size());
Map<StatsType, String> statsTypeToValue = Maps.newHashMap();
for (int i = 0; i < statsTypeList.size(); i++) {
statsTypeToValue.put(statsTypeList.get(i), queryResultList.get(i));
}
StatisticsTaskResult result = new StatisticsTaskResult(granularityDesc, categoryDesc, statsTypeToValue);
return result;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.statistics;

import org.apache.doris.common.Config;

import java.util.List;

/*
The @SampleSQLStatisticsTask is also a statistical task that executes a query
and uses the query result as a statistical value (same as @SQLStatisticsTask).
The only difference from the SQLStatisticsTask is that the query is a sampling table query.
*/
public class SampleSQLStatisticsTask extends SQLStatisticsTask {
private float samplePercentage = Config.default_sample_percentage;

public SampleSQLStatisticsTask(long jobId, StatsGranularityDesc granularityDesc,
StatsCategoryDesc categoryDesc, List<StatsType> statsTypeList) {
super(jobId, granularityDesc, categoryDesc, statsTypeList);
}

@Override
protected void constructQuery() {
// TODO
super.constructQuery();
// step1: construct table sample
}
}
Loading

0 comments on commit a9da0d2

Please sign in to comment.