Skip to content

Commit

Permalink
[Feature](Export) Export sql supports to export data of view and …
Browse files Browse the repository at this point in the history
…`exrernal table` (apache#24070)

Previously, EXPORT only supported the export of the olap table,
This pr supports the export of view table and external table.
  • Loading branch information
BePPPower authored Sep 13, 2023
1 parent d7e5f97 commit 9847f77
Show file tree
Hide file tree
Showing 9 changed files with 2,064 additions and 117 deletions.
145 changes: 99 additions & 46 deletions fe/fe-core/src/main/java/org/apache/doris/load/ExportJob.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@
import org.apache.doris.catalog.MaterializedIndex.IndexExtState;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.Partition;
import org.apache.doris.catalog.Table;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.catalog.TableIf.TableType;
import org.apache.doris.common.Config;
import org.apache.doris.common.FeMetaVersion;
import org.apache.doris.common.Pair;
Expand All @@ -51,6 +52,7 @@
import org.apache.doris.common.io.Writable;
import org.apache.doris.common.util.SqlParserUtils;
import org.apache.doris.common.util.TimeUtils;
import org.apache.doris.datasource.InternalCatalog;
import org.apache.doris.nereids.StatementContext;
import org.apache.doris.nereids.analyzer.UnboundRelation;
import org.apache.doris.nereids.analyzer.UnboundSlot;
Expand Down Expand Up @@ -199,7 +201,7 @@ public Map<String, Long> getPartitionToVersion() {

private List<String> exportColumns = Lists.newArrayList();

private Table exportTable;
private TableIf exportTable;

// when set to true, means this job instance is created by replay thread(FE restarted or master changed)
private boolean isReplayed = false;
Expand Down Expand Up @@ -242,17 +244,6 @@ public ExportJob(long jobId) {
this.id = jobId;
}

/**
* For an ExportJob:
* The ExportJob is divided into multiple 'ExportTaskExecutor'
* according to the 'parallelism' set by the user.
* The tablets which will be exported by this ExportJob are divided into 'parallelism' copies,
* and each ExportTaskExecutor is responsible for a list of tablets.
* The tablets responsible for an ExportTaskExecutor will be assigned to multiple OutfileStmt
* according to the 'TABLETS_NUM_PER_OUTFILE_IN_EXPORT'.
*
* @throws UserException
*/
public void generateOutfileStatement() throws UserException {
exportTable.readLock();
try {
Expand All @@ -264,39 +255,35 @@ public void generateOutfileStatement() throws UserException {
generateExportJobExecutor();
}

public void generateOutfileLogicalPlans(List<String> nameParts)
/**
* For an ExportJob:
* The ExportJob is divided into multiple 'ExportTaskExecutor'
* according to the 'parallelism' set by the user.
* The tablets which will be exported by this ExportJob are divided into 'parallelism' copies,
* and each ExportTaskExecutor is responsible for a list of tablets.
* The tablets responsible for an ExportTaskExecutor will be assigned to multiple OutfileStmt
* according to the 'TABLETS_NUM_PER_OUTFILE_IN_EXPORT'.
*
* @throws UserException
*/
public void generateOutfileLogicalPlans(List<String> qualifiedTableName)
throws UserException {
String catalogType = Env.getCurrentEnv().getCatalogMgr().getCatalog(this.tableName.getCtl()).getType();
exportTable.readLock();
try {
// build source columns
List<NamedExpression> selectLists = Lists.newArrayList();
if (exportColumns.isEmpty()) {
selectLists.add(new UnboundStar(ImmutableList.of()));
} else {
this.exportColumns.stream().forEach(col -> {
selectLists.add(new UnboundSlot(this.tableName.getTbl(), col));
});
}

// get all tablets
List<List<Long>> tabletsListPerParallel = splitTablets();

// Each Outfile clause responsible for MAXIMUM_TABLETS_OF_OUTFILE_IN_EXPORT tablets
for (List<Long> tabletsList : tabletsListPerParallel) {
List<StatementBase> logicalPlanAdapters = Lists.newArrayList();
for (int i = 0; i < tabletsList.size(); i += MAXIMUM_TABLETS_OF_OUTFILE_IN_EXPORT) {
int end = i + MAXIMUM_TABLETS_OF_OUTFILE_IN_EXPORT < tabletsList.size()
? i + MAXIMUM_TABLETS_OF_OUTFILE_IN_EXPORT : tabletsList.size();
List<Long> tabletIds = new ArrayList<>(tabletsList.subList(i, end));

// generate LogicalPlan
LogicalPlan plan = generateOneLogicalPlan(nameParts, tabletIds, selectLists);
// generate LogicalPlanAdapter
StatementBase statementBase = generateLogicalPlanAdapter(plan);

logicalPlanAdapters.add(statementBase);
if (InternalCatalog.INTERNAL_CATALOG_NAME.equals(catalogType)) {
if (exportTable.getType() == TableType.VIEW) {
// view table
generateViewOrExternalTableOutfile(qualifiedTableName);
} else if (exportTable.getType() == TableType.OLAP) {
// olap table
generateOlapTableOutfile(qualifiedTableName);
} else {
throw new UserException("Do not support export table type [" + exportTable.getType() + "]");
}
selectStmtListPerParallel.add(logicalPlanAdapters);
} else {
// external table
generateViewOrExternalTableOutfile(qualifiedTableName);
}

// debug LOG output
Expand All @@ -315,11 +302,77 @@ public void generateOutfileLogicalPlans(List<String> nameParts)
generateExportJobExecutor();
}

private LogicalPlan generateOneLogicalPlan(List<String> nameParts, List<Long> tabletIds,
List<NamedExpression> selectLists) {
private void generateOlapTableOutfile(List<String> qualifiedTableName) throws UserException {
// build source columns
List<NamedExpression> selectLists = Lists.newArrayList();
if (exportColumns.isEmpty()) {
selectLists.add(new UnboundStar(ImmutableList.of()));
} else {
this.exportColumns.stream().forEach(col -> {
selectLists.add(new UnboundSlot(this.tableName.getTbl(), col));
});
}

// get all tablets
List<List<Long>> tabletsListPerParallel = splitTablets();

// Each Outfile clause responsible for MAXIMUM_TABLETS_OF_OUTFILE_IN_EXPORT tablets
for (List<Long> tabletsList : tabletsListPerParallel) {
List<StatementBase> logicalPlanAdapters = Lists.newArrayList();
for (int i = 0; i < tabletsList.size(); i += MAXIMUM_TABLETS_OF_OUTFILE_IN_EXPORT) {
int end = i + MAXIMUM_TABLETS_OF_OUTFILE_IN_EXPORT < tabletsList.size()
? i + MAXIMUM_TABLETS_OF_OUTFILE_IN_EXPORT : tabletsList.size();
List<Long> tabletIds = new ArrayList<>(tabletsList.subList(i, end));

// generate LogicalPlan
LogicalPlan plan = generateOneLogicalPlan(qualifiedTableName, tabletIds,
this.partitionNames, selectLists);
// generate LogicalPlanAdapter
StatementBase statementBase = generateLogicalPlanAdapter(plan);

logicalPlanAdapters.add(statementBase);
}
selectStmtListPerParallel.add(logicalPlanAdapters);
}
}

/**
* This method used to generate outfile sql for view table or external table.
* @throws UserException
*/
private void generateViewOrExternalTableOutfile(List<String> qualifiedTableName) {
// Because there is no division of tablets in view and external table
// we set parallelism = 1;
this.parallelism = 1;
LOG.debug("Because there is no division of tablets in view and external table, we set parallelism = 1");

// build source columns
List<NamedExpression> selectLists = Lists.newArrayList();
if (exportColumns.isEmpty()) {
selectLists.add(new UnboundStar(ImmutableList.of()));
} else {
this.exportColumns.stream().forEach(col -> {
selectLists.add(new UnboundSlot(this.tableName.getTbl(), col));
});
}

List<StatementBase> logicalPlanAdapters = Lists.newArrayList();

// generate LogicalPlan
LogicalPlan plan = generateOneLogicalPlan(qualifiedTableName, ImmutableList.of(),
ImmutableList.of(), selectLists);
// generate LogicalPlanAdapter
StatementBase statementBase = generateLogicalPlanAdapter(plan);

logicalPlanAdapters.add(statementBase);
selectStmtListPerParallel.add(logicalPlanAdapters);
}

private LogicalPlan generateOneLogicalPlan(List<String> qualifiedTableName, List<Long> tabletIds,
List<String> partitions, List<NamedExpression> selectLists) {
// UnboundRelation
LogicalPlan plan = new UnboundRelation(StatementScopeIdGenerator.newRelationId(), nameParts,
this.partitionNames, false, tabletIds, ImmutableList.of());
LogicalPlan plan = new UnboundRelation(StatementScopeIdGenerator.newRelationId(), qualifiedTableName,
partitions, false, tabletIds, ImmutableList.of());
// LogicalCheckPolicy
plan = new LogicalCheckPolicy<>(plan);
// LogicalFilter
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.Partition;
import org.apache.doris.catalog.TableIf.TableType;
import org.apache.doris.catalog.TabletMeta;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.load.ExportFailMsg.CancelType;
Expand Down Expand Up @@ -84,47 +85,49 @@ public void execute() throws JobException {
throw new JobException("Export executor has been canceled, task id: {}", taskId);
}
// check the version of tablets
try {
Database db = Env.getCurrentEnv().getInternalCatalog().getDbOrAnalysisException(
exportJob.getTableName().getDb());
OlapTable table = db.getOlapTableOrAnalysisException(exportJob.getTableName().getTbl());
table.readLock();
if (exportJob.getExportTable().getType() == TableType.OLAP) {
try {
List<Long> tabletIds;
if (exportJob.getSessionVariables().isEnableNereidsPlanner()) {
LogicalPlanAdapter logicalPlanAdapter = (LogicalPlanAdapter) selectStmtLists.get(idx);
Optional<UnboundRelation> unboundRelation = findUnboundRelation(
logicalPlanAdapter.getLogicalPlan());
tabletIds = unboundRelation.get().getTabletIds();
} else {
SelectStmt selectStmt = (SelectStmt) selectStmtLists.get(idx);
tabletIds = selectStmt.getTableRefs().get(0).getSampleTabletIds();
}
Database db = Env.getCurrentEnv().getInternalCatalog().getDbOrAnalysisException(
exportJob.getTableName().getDb());
OlapTable table = db.getOlapTableOrAnalysisException(exportJob.getTableName().getTbl());
table.readLock();
try {
List<Long> tabletIds;
if (exportJob.getSessionVariables().isEnableNereidsPlanner()) {
LogicalPlanAdapter logicalPlanAdapter = (LogicalPlanAdapter) selectStmtLists.get(idx);
Optional<UnboundRelation> unboundRelation = findUnboundRelation(
logicalPlanAdapter.getLogicalPlan());
tabletIds = unboundRelation.get().getTabletIds();
} else {
SelectStmt selectStmt = (SelectStmt) selectStmtLists.get(idx);
tabletIds = selectStmt.getTableRefs().get(0).getSampleTabletIds();
}

for (Long tabletId : tabletIds) {
TabletMeta tabletMeta = Env.getCurrentEnv().getTabletInvertedIndex().getTabletMeta(
tabletId);
Partition partition = table.getPartition(tabletMeta.getPartitionId());
long nowVersion = partition.getVisibleVersion();
long oldVersion = exportJob.getPartitionToVersion().get(partition.getName());
if (nowVersion != oldVersion) {
exportJob.updateExportJobState(ExportJobState.CANCELLED, taskId, null,
CancelType.RUN_FAIL, "The version of tablet {" + tabletId + "} has changed");
throw new JobException("Export Job[{}]: Tablet {} has changed version, old version = {}, "
+ "now version = {}", exportJob.getId(), tabletId, oldVersion, nowVersion);
for (Long tabletId : tabletIds) {
TabletMeta tabletMeta = Env.getCurrentEnv().getTabletInvertedIndex().getTabletMeta(
tabletId);
Partition partition = table.getPartition(tabletMeta.getPartitionId());
long nowVersion = partition.getVisibleVersion();
long oldVersion = exportJob.getPartitionToVersion().get(partition.getName());
if (nowVersion != oldVersion) {
exportJob.updateExportJobState(ExportJobState.CANCELLED, taskId, null,
CancelType.RUN_FAIL, "The version of tablet {" + tabletId + "} has changed");
throw new JobException("Export Job[{}]: Tablet {} has changed version, old version = {}"
+ ", now version = {}", exportJob.getId(), tabletId, oldVersion, nowVersion);
}
}
} catch (Exception e) {
exportJob.updateExportJobState(ExportJobState.CANCELLED, taskId, null,
ExportFailMsg.CancelType.RUN_FAIL, e.getMessage());
throw new JobException(e);
} finally {
table.readUnlock();
}
} catch (Exception e) {
} catch (AnalysisException e) {
exportJob.updateExportJobState(ExportJobState.CANCELLED, taskId, null,
ExportFailMsg.CancelType.RUN_FAIL, e.getMessage());
throw new JobException(e);
} finally {
table.readUnlock();
}
} catch (AnalysisException e) {
exportJob.updateExportJobState(ExportJobState.CANCELLED, taskId, null,
ExportFailMsg.CancelType.RUN_FAIL, e.getMessage());
throw new JobException(e);
}

try (AutoCloseConnectContext r = buildConnectContext()) {
Expand Down
Loading

0 comments on commit 9847f77

Please sign in to comment.