From 6ed0ce7a8d9dd4ce4d70087ed6e1dfaf7d169b24 Mon Sep 17 00:00:00 2001 From: Manasyan Tigran Date: Tue, 30 May 2023 17:49:15 +0700 Subject: [PATCH] [debezium] bump debezium version to 1.9.7. --- .../base/dialect/JdbcDataSourceDialect.java | 15 +- .../relational/JdbcSourceEventDispatcher.java | 13 +- .../source/EmbeddedFlinkDatabaseHistory.java | 6 + .../external/JdbcSourceFetchTaskContext.java | 3 + .../EmbeddedFlinkDatabaseHistory.java | 6 + .../base/experimental/MySqlDialect.java | 19 + .../fetch/MySqlScanFetchTask.java | 217 +-- .../fetch/MySqlSourceFetchTaskContext.java | 31 +- .../fetch/MySqlStreamFetchTask.java | 27 +- .../base/experimental/utils/MySqlSchema.java | 9 +- .../cdc/connectors/tests/OracleE2eITCase.java | 1 + .../internal/DebeziumChangeFetcher.java | 2 +- .../internal/FlinkOffsetBackingStore.java | 1 + ...izedRelationalDatabaseConnectorConfig.java | 175 ++ .../reader/fetch/MongoDBFetchTaskContext.java | 7 +- .../mysql/debezium/DebeziumUtils.java | 5 +- .../dispatcher/EventDispatcherImpl.java | 21 + .../debezium/reader/BinlogSplitReader.java | 1 + .../debezium/reader/SnapshotSplitReader.java | 3 + .../task/context/StatefulTaskContext.java | 23 +- .../connectors/mysql/schema/MySqlSchema.java | 7 +- .../connectors/mysql/source/MySqlSource.java | 7 +- .../mysql/source/assigners/ChunkSplitter.java | 4 +- .../source/assigners/MySqlChunkSplitter.java | 55 +- .../assigners/MySqlSnapshotSplitAssigner.java | 15 +- .../config/MySqlSourceConfigFactory.java | 9 +- .../source/reader/MySqlSourceReader.java | 9 +- .../mysql/source/utils/StatementUtils.java | 2 +- .../source/utils/TableDiscoveryUtils.java | 30 +- .../connector/mysql/MySqlConnection.java | 96 +- .../mysql/MySqlDefaultValueConverter.java | 451 ----- .../MySqlStreamingChangeEventSource.java | 1469 ----------------- .../listener/DefaultValueParserListener.java | 71 +- .../mysql/LegacyMySqlSourceTest.java | 4 + .../reader/BinlogSplitReaderTest.java | 28 +- .../reader/SnapshotSplitReaderTest.java | 19 +- .../PendingSplitsStateSerializerTest.java | 5 + .../source/reader/MySqlRecordEmitterTest.java | 28 +- .../source/reader/MySqlSourceReaderTest.java | 8 +- .../oceanbase/source/OceanBaseConnection.java | 6 +- .../source/OceanBaseTableSchema.java | 0 flink-connector-oracle-cdc/pom.xml | 5 + .../cdc/connectors/oracle/OracleSource.java | 2 + .../reader/fetch/OracleScanFetchTask.java | 132 +- .../fetch/OracleSourceFetchTaskContext.java | 32 +- .../reader/fetch/OracleStreamFetchTask.java | 23 +- .../source/utils/OracleConnectionUtils.java | 10 +- .../oracle/source/utils/OracleUtils.java | 11 +- .../connector/oracle/OracleErrorHandler.java | 105 -- .../oracle/logminer/LogMinerAdapter.java | 448 +++++ .../LogMinerStreamingChangeEventSource.java | 1067 +++++++++--- .../oracle/logminer/TransactionalBuffer.java | 1321 --------------- .../connectors/oracle/OracleSourceTest.java | 2 + .../oracle/source/OracleSourceITCase.java | 3 +- .../oracle/table/OracleConnectorITCase.java | 150 ++ .../connector/postgresql/connection/Lsn.java | 149 ++ .../table/PostgreSQLConnectorITCase.java | 1 - pom.xml | 2 +- 58 files changed, 2300 insertions(+), 4071 deletions(-) create mode 100644 flink-connector-debezium/src/main/java/io/debezium/relational/HistorizedRelationalDatabaseConnectorConfig.java delete mode 100644 flink-connector-mysql-cdc/src/main/java/io/debezium/connector/mysql/MySqlDefaultValueConverter.java delete mode 100644 flink-connector-mysql-cdc/src/main/java/io/debezium/connector/mysql/MySqlStreamingChangeEventSource.java delete mode 100644 flink-connector-oceanbase-cdc/src/main/java/com/ververica/cdc/connectors/oceanbase/source/OceanBaseTableSchema.java delete mode 100644 flink-connector-oracle-cdc/src/main/java/io/debezium/connector/oracle/OracleErrorHandler.java create mode 100644 flink-connector-oracle-cdc/src/main/java/io/debezium/connector/oracle/logminer/LogMinerAdapter.java delete mode 100644 flink-connector-oracle-cdc/src/main/java/io/debezium/connector/oracle/logminer/TransactionalBuffer.java create mode 100644 flink-connector-postgres-cdc/src/main/java/io/debezium/connector/postgresql/connection/Lsn.java diff --git a/flink-cdc-base/src/main/java/com/ververica/cdc/connectors/base/dialect/JdbcDataSourceDialect.java b/flink-cdc-base/src/main/java/com/ververica/cdc/connectors/base/dialect/JdbcDataSourceDialect.java index c48a8045a5c..eff5a9fc250 100644 --- a/flink-cdc-base/src/main/java/com/ververica/cdc/connectors/base/dialect/JdbcDataSourceDialect.java +++ b/flink-cdc-base/src/main/java/com/ververica/cdc/connectors/base/dialect/JdbcDataSourceDialect.java @@ -17,11 +17,9 @@ package com.ververica.cdc.connectors.base.dialect; import org.apache.flink.annotation.Experimental; -import org.apache.flink.util.FlinkRuntimeException; import com.ververica.cdc.connectors.base.config.JdbcSourceConfig; import com.ververica.cdc.connectors.base.config.SourceConfig; -import com.ververica.cdc.connectors.base.relational.connection.JdbcConnectionFactory; import com.ververica.cdc.connectors.base.relational.connection.JdbcConnectionPoolFactory; import com.ververica.cdc.connectors.base.source.meta.split.SourceSplitBase; import com.ververica.cdc.connectors.base.source.reader.external.FetchTask; @@ -51,18 +49,7 @@ public interface JdbcDataSourceDialect extends DataSourceDialect */ -public class JdbcSourceEventDispatcher extends EventDispatcher { +public class JdbcSourceEventDispatcher

extends EventDispatcher { private static final Logger LOG = LoggerFactory.getLogger(JdbcSourceEventDispatcher.class); public static final String HISTORY_RECORD_FIELD = "historyRecord"; @@ -132,7 +134,9 @@ public ChangeEventQueue getQueue() { @Override public void dispatchSchemaChangeEvent( - TableId dataCollectionId, SchemaChangeEventEmitter schemaChangeEventEmitter) + P partition, + TableId dataCollectionId, + SchemaChangeEventEmitter schemaChangeEventEmitter) throws InterruptedException { if (dataCollectionId != null && !filter.isIncluded(dataCollectionId)) { if (historizedSchema == null || historizedSchema.storeOnlyCapturedTables()) { @@ -141,6 +145,11 @@ public void dispatchSchemaChangeEvent( } } schemaChangeEventEmitter.emitSchemaChangeEvent(new SchemaChangeEventReceiver()); + IncrementalSnapshotChangeEventSource incrementalEventSource = + getIncrementalSnapshotChangeEventSource(); + if (incrementalEventSource != null) { + incrementalEventSource.processSchemaChange(partition, dataCollectionId); + } } @Override diff --git a/flink-cdc-base/src/main/java/com/ververica/cdc/connectors/base/source/EmbeddedFlinkDatabaseHistory.java b/flink-cdc-base/src/main/java/com/ververica/cdc/connectors/base/source/EmbeddedFlinkDatabaseHistory.java index df7a4321a27..c08cd5fbe58 100644 --- a/flink-cdc-base/src/main/java/com/ververica/cdc/connectors/base/source/EmbeddedFlinkDatabaseHistory.java +++ b/flink-cdc-base/src/main/java/com/ververica/cdc/connectors/base/source/EmbeddedFlinkDatabaseHistory.java @@ -107,6 +107,12 @@ public void recover( listener.recoveryStopped(); } + @Override + public void recover( + Map, Map> offsets, Tables schema, DdlParser ddlParser) { + offsets.forEach((source, position) -> recover(source, position, schema, ddlParser)); + } + @Override public void stop() { listener.stopped(); diff --git a/flink-cdc-base/src/main/java/com/ververica/cdc/connectors/base/source/reader/external/JdbcSourceFetchTaskContext.java b/flink-cdc-base/src/main/java/com/ververica/cdc/connectors/base/source/reader/external/JdbcSourceFetchTaskContext.java index 99fb648c33f..be19f50def6 100644 --- a/flink-cdc-base/src/main/java/com/ververica/cdc/connectors/base/source/reader/external/JdbcSourceFetchTaskContext.java +++ b/flink-cdc-base/src/main/java/com/ververica/cdc/connectors/base/source/reader/external/JdbcSourceFetchTaskContext.java @@ -27,6 +27,7 @@ import io.debezium.data.Envelope; import io.debezium.pipeline.ErrorHandler; import io.debezium.pipeline.spi.OffsetContext; +import io.debezium.pipeline.spi.Partition; import io.debezium.relational.RelationalDatabaseSchema; import io.debezium.relational.Table; import io.debezium.relational.TableId; @@ -169,4 +170,6 @@ public SchemaNameAdjuster getSchemaNameAdjuster() { public abstract JdbcSourceEventDispatcher getDispatcher(); public abstract OffsetContext getOffsetContext(); + + public abstract Partition getPartition(); } diff --git a/flink-cdc-base/src/test/java/com/ververica/cdc/connectors/base/experimental/EmbeddedFlinkDatabaseHistory.java b/flink-cdc-base/src/test/java/com/ververica/cdc/connectors/base/experimental/EmbeddedFlinkDatabaseHistory.java index 84295fb4f64..39d85edfe2b 100644 --- a/flink-cdc-base/src/test/java/com/ververica/cdc/connectors/base/experimental/EmbeddedFlinkDatabaseHistory.java +++ b/flink-cdc-base/src/test/java/com/ververica/cdc/connectors/base/experimental/EmbeddedFlinkDatabaseHistory.java @@ -107,6 +107,12 @@ public void recover( listener.recoveryStopped(); } + @Override + public void recover( + Map, Map> offsets, Tables schema, DdlParser ddlParser) { + offsets.forEach((source, position) -> recover(source, position, schema, ddlParser)); + } + @Override public void stop() { listener.stopped(); diff --git a/flink-cdc-base/src/test/java/com/ververica/cdc/connectors/base/experimental/MySqlDialect.java b/flink-cdc-base/src/test/java/com/ververica/cdc/connectors/base/experimental/MySqlDialect.java index ff4d4bd516a..afac6096f67 100644 --- a/flink-cdc-base/src/test/java/com/ververica/cdc/connectors/base/experimental/MySqlDialect.java +++ b/flink-cdc-base/src/test/java/com/ververica/cdc/connectors/base/experimental/MySqlDialect.java @@ -29,12 +29,14 @@ import com.ververica.cdc.connectors.base.experimental.fetch.MySqlStreamFetchTask; import com.ververica.cdc.connectors.base.experimental.utils.MySqlSchema; import com.ververica.cdc.connectors.base.experimental.utils.TableDiscoveryUtils; +import com.ververica.cdc.connectors.base.relational.connection.JdbcConnectionFactory; import com.ververica.cdc.connectors.base.relational.connection.JdbcConnectionPoolFactory; import com.ververica.cdc.connectors.base.source.assigner.splitter.ChunkSplitter; import com.ververica.cdc.connectors.base.source.meta.offset.Offset; import com.ververica.cdc.connectors.base.source.meta.split.SourceSplitBase; import com.ververica.cdc.connectors.base.source.reader.external.FetchTask; import io.debezium.connector.mysql.MySqlConnection; +import io.debezium.jdbc.JdbcConfiguration; import io.debezium.jdbc.JdbcConnection; import io.debezium.relational.TableId; import io.debezium.relational.history.TableChanges; @@ -54,6 +56,8 @@ @Experimental public class MySqlDialect implements JdbcDataSourceDialect { + private static final String QUOTED_CHARACTER = "`"; + private static final long serialVersionUID = 1L; private final MySqlSourceConfigFactory configFactory; private final MySqlSourceConfig sourceConfig; @@ -69,6 +73,21 @@ public String getName() { return "MySQL"; } + public JdbcConnection openJdbcConnection(JdbcSourceConfig sourceConfig) { + JdbcConnection jdbc = + new JdbcConnection( + JdbcConfiguration.adapt(sourceConfig.getDbzConfiguration()), + new JdbcConnectionFactory(sourceConfig, getPooledDataSourceFactory()), + QUOTED_CHARACTER, + QUOTED_CHARACTER); + try { + jdbc.connect(); + } catch (Exception e) { + throw new FlinkRuntimeException(e); + } + return jdbc; + } + @Override public Offset displayCurrentOffset(JdbcSourceConfig sourceConfig) { try (JdbcConnection jdbcConnection = openJdbcConnection(sourceConfig)) { diff --git a/flink-cdc-base/src/test/java/com/ververica/cdc/connectors/base/experimental/fetch/MySqlScanFetchTask.java b/flink-cdc-base/src/test/java/com/ververica/cdc/connectors/base/experimental/fetch/MySqlScanFetchTask.java index 2564a258a29..256aaeb7a60 100644 --- a/flink-cdc-base/src/test/java/com/ververica/cdc/connectors/base/experimental/fetch/MySqlScanFetchTask.java +++ b/flink-cdc-base/src/test/java/com/ververica/cdc/connectors/base/experimental/fetch/MySqlScanFetchTask.java @@ -30,16 +30,14 @@ import io.debezium.connector.mysql.MySqlConnectorConfig; import io.debezium.connector.mysql.MySqlDatabaseSchema; import io.debezium.connector.mysql.MySqlOffsetContext; -import io.debezium.connector.mysql.MySqlValueConverters; +import io.debezium.connector.mysql.MySqlPartition; import io.debezium.heartbeat.Heartbeat; import io.debezium.pipeline.EventDispatcher; import io.debezium.pipeline.source.AbstractSnapshotChangeEventSource; import io.debezium.pipeline.source.spi.ChangeEventSource; import io.debezium.pipeline.source.spi.SnapshotProgressListener; import io.debezium.pipeline.spi.ChangeRecordEmitter; -import io.debezium.pipeline.spi.OffsetContext; import io.debezium.pipeline.spi.SnapshotResult; -import io.debezium.relational.Column; import io.debezium.relational.RelationalSnapshotChangeEventSource; import io.debezium.relational.SnapshotChangeRecordEmitter; import io.debezium.relational.Table; @@ -52,15 +50,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.UnsupportedEncodingException; -import java.sql.Blob; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; -import java.sql.Types; import java.time.Duration; import java.util.ArrayList; -import java.util.Calendar; import java.util.Map; import static com.ververica.cdc.connectors.base.experimental.utils.MySqlConnectionUtils.createMySqlConnection; @@ -105,9 +99,11 @@ public void execute(Context context) throws Exception { split); SnapshotSplitChangeEventSourceContext changeEventSourceContext = new SnapshotSplitChangeEventSourceContext(); - SnapshotResult snapshotResult = + SnapshotResult snapshotResult = snapshotSplitReadTask.execute( - changeEventSourceContext, sourceFetchContext.getOffsetContext()); + changeEventSourceContext, + sourceFetchContext.getPartition(), + sourceFetchContext.getOffsetContext()); final StreamSplit backfillBinlogSplit = createBackfillBinlogSplit(changeEventSourceContext); // optimization that skip the binlog read when the low watermark equals high @@ -119,7 +115,7 @@ public void execute(Context context) throws Exception { if (!binlogBackfillRequired) { dispatchBinlogEndEvent( backfillBinlogSplit, - ((MySqlSourceFetchTaskContext) context).getOffsetContext().getPartition(), + sourceFetchContext.getPartition().getSourcePartition(), ((MySqlSourceFetchTaskContext) context).getDispatcher()); taskRunning = false; return; @@ -130,6 +126,7 @@ public void execute(Context context) throws Exception { createBackfillBinlogReadTask(backfillBinlogSplit, sourceFetchContext); backfillBinlogReadTask.execute( new SnapshotBinlogSplitChangeEventSourceContext(), + sourceFetchContext.getPartition(), sourceFetchContext.getOffsetContext()); } else { taskRunning = false; @@ -151,11 +148,6 @@ private StreamSplit createBackfillBinlogSplit( private MySqlBinlogSplitReadTask createBackfillBinlogReadTask( StreamSplit backfillBinlogSplit, MySqlSourceFetchTaskContext context) { - final MySqlOffsetContext.Loader loader = - new MySqlOffsetContext.Loader(context.getSourceConfig().getDbzConnectorConfig()); - final MySqlOffsetContext mySqlOffsetContext = - (MySqlOffsetContext) - loader.load(backfillBinlogSplit.getStartingOffset().getOffset()); // we should only capture events for the current table, // otherwise, we may can't find corresponding schema Configuration dezConf = @@ -169,7 +161,6 @@ private MySqlBinlogSplitReadTask createBackfillBinlogReadTask( // task to read binlog and backfill for current split return new MySqlBinlogSplitReadTask( new MySqlConnectorConfig(dezConf), - mySqlOffsetContext, createMySqlConnection(context.getSourceConfig().getDbzConfiguration()), context.getDispatcher(), context.getErrorHandler(), @@ -181,7 +172,7 @@ private MySqlBinlogSplitReadTask createBackfillBinlogReadTask( private void dispatchBinlogEndEvent( StreamSplit backFillBinlogSplit, Map sourcePartition, - JdbcSourceEventDispatcher eventDispatcher) + JdbcSourceEventDispatcher eventDispatcher) throws InterruptedException { eventDispatcher.dispatchWatermarkEvent( sourcePartition, @@ -191,7 +182,8 @@ private void dispatchBinlogEndEvent( } /** A wrapped task to fetch snapshot split of table. */ - public static class MySqlSnapshotSplitReadTask extends AbstractSnapshotChangeEventSource { + public static class MySqlSnapshotSplitReadTask + extends AbstractSnapshotChangeEventSource { private static final Logger LOG = LoggerFactory.getLogger(MySqlSnapshotSplitReadTask.class); @@ -201,19 +193,19 @@ public static class MySqlSnapshotSplitReadTask extends AbstractSnapshotChangeEve private final MySqlConnectorConfig connectorConfig; private final MySqlDatabaseSchema databaseSchema; private final MySqlConnection jdbcConnection; - private final JdbcSourceEventDispatcher dispatcher; + private final JdbcSourceEventDispatcher dispatcher; private final Clock clock; private final SnapshotSplit snapshotSplit; private final MySqlOffsetContext offsetContext; - private final SnapshotProgressListener snapshotProgressListener; + private final SnapshotProgressListener snapshotProgressListener; public MySqlSnapshotSplitReadTask( MySqlConnectorConfig connectorConfig, MySqlOffsetContext previousOffset, - SnapshotProgressListener snapshotProgressListener, + SnapshotProgressListener snapshotProgressListener, MySqlDatabaseSchema databaseSchema, MySqlConnection jdbcConnection, - JdbcSourceEventDispatcher dispatcher, + JdbcSourceEventDispatcher dispatcher, SnapshotSplit snapshotSplit) { super(connectorConfig, snapshotProgressListener); this.offsetContext = previousOffset; @@ -227,13 +219,15 @@ public MySqlSnapshotSplitReadTask( } @Override - public SnapshotResult execute( - ChangeEventSourceContext context, OffsetContext previousOffset) + public SnapshotResult execute( + ChangeEventSourceContext context, + MySqlPartition partition, + MySqlOffsetContext previousOffset) throws InterruptedException { - SnapshottingTask snapshottingTask = getSnapshottingTask(previousOffset); - final SnapshotContext ctx; + SnapshottingTask snapshottingTask = getSnapshottingTask(partition, previousOffset); + final MySqlSnapshotContext ctx; try { - ctx = prepare(context); + ctx = prepare(partition); } catch (Exception e) { LOG.error("Failed to initialize snapshot context.", e); throw new RuntimeException(e); @@ -249,14 +243,13 @@ public SnapshotResult execute( } @Override - protected SnapshotResult doExecute( + protected SnapshotResult doExecute( ChangeEventSourceContext context, - OffsetContext previousOffset, + MySqlOffsetContext previousOffset, SnapshotContext snapshotContext, SnapshottingTask snapshottingTask) throws Exception { - final RelationalSnapshotChangeEventSource.RelationalSnapshotContext ctx = - (RelationalSnapshotChangeEventSource.RelationalSnapshotContext) snapshotContext; + final MySqlSnapshotContext ctx = (MySqlSnapshotContext) snapshotContext; ctx.offset = offsetContext; final BinlogOffset lowWatermark = currentBinlogOffset(jdbcConnection); @@ -266,7 +259,10 @@ protected SnapshotResult doExecute( snapshotSplit); ((SnapshotSplitChangeEventSourceContext) (context)).setLowWatermark(lowWatermark); dispatcher.dispatchWatermarkEvent( - offsetContext.getPartition(), snapshotSplit, lowWatermark, WatermarkKind.LOW); + snapshotContext.partition.getSourcePartition(), + snapshotSplit, + lowWatermark, + WatermarkKind.LOW); LOG.info("Snapshot step 2 - Snapshotting data"); createDataEvents(ctx, snapshotSplit.getTableId()); @@ -278,34 +274,36 @@ protected SnapshotResult doExecute( snapshotSplit); ((SnapshotSplitChangeEventSourceContext) (context)).setHighWatermark(lowWatermark); dispatcher.dispatchWatermarkEvent( - offsetContext.getPartition(), snapshotSplit, highWatermark, WatermarkKind.HIGH); + snapshotContext.partition.getSourcePartition(), + snapshotSplit, + highWatermark, + WatermarkKind.HIGH); return SnapshotResult.completed(ctx.offset); } @Override - protected SnapshottingTask getSnapshottingTask(OffsetContext previousOffset) { + protected SnapshottingTask getSnapshottingTask( + MySqlPartition partition, MySqlOffsetContext previousOffset) { return new SnapshottingTask(false, true); } @Override - protected SnapshotContext prepare(ChangeEventSourceContext changeEventSourceContext) - throws Exception { - return new MySqlSnapshotContext(); + protected MySqlSnapshotContext prepare(MySqlPartition partition) throws Exception { + return new MySqlSnapshotContext(partition); } private static class MySqlSnapshotContext - extends RelationalSnapshotChangeEventSource.RelationalSnapshotContext { + extends RelationalSnapshotChangeEventSource.RelationalSnapshotContext< + MySqlPartition, MySqlOffsetContext> { - public MySqlSnapshotContext() throws SQLException { - super(""); + public MySqlSnapshotContext(MySqlPartition partition) throws SQLException { + super(partition, ""); } } - private void createDataEvents( - RelationalSnapshotChangeEventSource.RelationalSnapshotContext snapshotContext, - TableId tableId) + private void createDataEvents(MySqlSnapshotContext snapshotContext, TableId tableId) throws Exception { - EventDispatcher.SnapshotReceiver snapshotReceiver = + EventDispatcher.SnapshotReceiver snapshotReceiver = dispatcher.getSnapshotChangeEventReceiver(); LOG.debug("Snapshotting table {}", tableId); createDataEventsForTable( @@ -315,8 +313,8 @@ private void createDataEvents( /** Dispatches the data change events for the records of a single table. */ private void createDataEventsForTable( - RelationalSnapshotChangeEventSource.RelationalSnapshotContext snapshotContext, - EventDispatcher.SnapshotReceiver snapshotReceiver, + MySqlSnapshotContext snapshotContext, + EventDispatcher.SnapshotReceiver snapshotReceiver, Table table) throws InterruptedException { @@ -356,12 +354,8 @@ private void createDataEventsForTable( while (rs.next()) { rows++; - final Object[] row = new Object[columnArray.getGreatestColumnPosition()]; - for (int i = 0; i < columnArray.getColumns().length; i++) { - Column actualColumn = table.columns().get(i); - row[columnArray.getColumns()[i].position() - 1] = - readField(rs, i + 1, actualColumn, table); - } + final Object[] row = + jdbcConnection.rowToArray(table, databaseSchema, rs, columnArray); if (logTimer.expired()) { long stop = clock.currentTimeInMillis(); LOG.info( @@ -369,10 +363,12 @@ private void createDataEventsForTable( rows, snapshotSplit.splitId(), Strings.duration(stop - exportStart)); - snapshotProgressListener.rowsScanned(table.id(), rows); + snapshotProgressListener.rowsScanned( + snapshotContext.partition, table.id(), rows); logTimer = getTableScanLogTimer(); } dispatcher.dispatchSnapshotEvent( + snapshotContext.partition, table.id(), getChangeRecordEmitter(snapshotContext, table.id(), row), snapshotReceiver); @@ -387,128 +383,23 @@ private void createDataEventsForTable( } } - protected ChangeRecordEmitter getChangeRecordEmitter( - SnapshotContext snapshotContext, TableId tableId, Object[] row) { + protected ChangeRecordEmitter getChangeRecordEmitter( + MySqlSnapshotContext snapshotContext, TableId tableId, Object[] row) { snapshotContext.offset.event(tableId, clock.currentTime()); - return new SnapshotChangeRecordEmitter(snapshotContext.offset, row, clock); + return new SnapshotChangeRecordEmitter<>( + snapshotContext.partition, snapshotContext.offset, row, clock); } private Threads.Timer getTableScanLogTimer() { return Threads.timer(clock, LOG_INTERVAL); } - - /** - * Read JDBC return value and deal special type like time, timestamp. - * - *

Note https://issues.redhat.com/browse/DBZ-3238 has fixed this issue, please remove - * this method once we bump Debezium version to 1.6 - */ - private Object readField(ResultSet rs, int fieldNo, Column actualColumn, Table actualTable) - throws SQLException { - if (actualColumn.jdbcType() == Types.TIME) { - return readTimeField(rs, fieldNo); - } else if (actualColumn.jdbcType() == Types.DATE) { - return readDateField(rs, fieldNo, actualColumn, actualTable); - } - // This is for DATETIME columns (a logical date + time without time zone) - // by reading them with a calendar based on the default time zone, we make sure that the - // value - // is constructed correctly using the database's (or connection's) time zone - else if (actualColumn.jdbcType() == Types.TIMESTAMP) { - return readTimestampField(rs, fieldNo, actualColumn, actualTable); - } - // JDBC's rs.GetObject() will return a Boolean for all TINYINT(1) columns. - // TINYINT columns are reprtoed as SMALLINT by JDBC driver - else if (actualColumn.jdbcType() == Types.TINYINT - || actualColumn.jdbcType() == Types.SMALLINT) { - // It seems that rs.wasNull() returns false when default value is set and NULL is - // inserted - // We thus need to use getObject() to identify if the value was provided and if yes - // then - // read it again to get correct scale - return rs.getObject(fieldNo) == null ? null : rs.getInt(fieldNo); - } - // DBZ-2673 - // It is necessary to check the type names as types like ENUM and SET are - // also reported as JDBC type char - else if ("CHAR".equals(actualColumn.typeName()) - || "VARCHAR".equals(actualColumn.typeName()) - || "TEXT".equals(actualColumn.typeName())) { - return rs.getBytes(fieldNo); - } else { - return rs.getObject(fieldNo); - } - } - - /** - * As MySQL connector/J implementation is broken for MySQL type "TIME" we have to use a - * binary-ish workaround. https://issues.jboss.org/browse/DBZ-342 - */ - private Object readTimeField(ResultSet rs, int fieldNo) throws SQLException { - Blob b = rs.getBlob(fieldNo); - if (b == null) { - return null; // Don't continue parsing time field if it is null - } - - try { - return MySqlValueConverters.stringToDuration( - new String(b.getBytes(1, (int) (b.length())), "UTF-8")); - } catch (UnsupportedEncodingException e) { - LOG.error("Could not read MySQL TIME value as UTF-8"); - throw new RuntimeException(e); - } - } - - /** - * In non-string mode the date field can contain zero in any of the date part which we need - * to handle as all-zero. - */ - private Object readDateField(ResultSet rs, int fieldNo, Column column, Table table) - throws SQLException { - Blob b = rs.getBlob(fieldNo); - if (b == null) { - return null; // Don't continue parsing date field if it is null - } - - try { - return MySqlValueConverters.stringToLocalDate( - new String(b.getBytes(1, (int) (b.length())), "UTF-8"), column, table); - } catch (UnsupportedEncodingException e) { - LOG.error("Could not read MySQL TIME value as UTF-8"); - throw new RuntimeException(e); - } - } - - /** - * In non-string mode the time field can contain zero in any of the date part which we need - * to handle as all-zero. - */ - private Object readTimestampField(ResultSet rs, int fieldNo, Column column, Table table) - throws SQLException { - Blob b = rs.getBlob(fieldNo); - if (b == null) { - return null; // Don't continue parsing timestamp field if it is null - } - - try { - return MySqlValueConverters.containsZeroValuesInDatePart( - (new String(b.getBytes(1, (int) (b.length())), "UTF-8")), - column, - table) - ? null - : rs.getTimestamp(fieldNo, Calendar.getInstance()); - } catch (UnsupportedEncodingException e) { - LOG.error("Could not read MySQL TIME value as UTF-8"); - throw new RuntimeException(e); - } - } } /** * {@link ChangeEventSource.ChangeEventSourceContext} implementation that keeps low/high * watermark for each {@link SnapshotSplit}. */ - public class SnapshotSplitChangeEventSourceContext + public static class SnapshotSplitChangeEventSourceContext implements ChangeEventSource.ChangeEventSourceContext { private BinlogOffset lowWatermark; diff --git a/flink-cdc-base/src/test/java/com/ververica/cdc/connectors/base/experimental/fetch/MySqlSourceFetchTaskContext.java b/flink-cdc-base/src/test/java/com/ververica/cdc/connectors/base/experimental/fetch/MySqlSourceFetchTaskContext.java index 49b4a33c4a8..42c14eb0059 100644 --- a/flink-cdc-base/src/test/java/com/ververica/cdc/connectors/base/experimental/fetch/MySqlSourceFetchTaskContext.java +++ b/flink-cdc-base/src/test/java/com/ververica/cdc/connectors/base/experimental/fetch/MySqlSourceFetchTaskContext.java @@ -37,6 +37,7 @@ import io.debezium.connector.mysql.MySqlDatabaseSchema; import io.debezium.connector.mysql.MySqlErrorHandler; import io.debezium.connector.mysql.MySqlOffsetContext; +import io.debezium.connector.mysql.MySqlPartition; import io.debezium.connector.mysql.MySqlStreamingChangeEventSourceMetrics; import io.debezium.connector.mysql.MySqlTaskContext; import io.debezium.connector.mysql.MySqlTopicSelector; @@ -46,6 +47,7 @@ import io.debezium.pipeline.metrics.SnapshotChangeEventSourceMetrics; import io.debezium.pipeline.source.spi.EventMetadataProvider; import io.debezium.pipeline.spi.OffsetContext; +import io.debezium.pipeline.spi.Offsets; import io.debezium.relational.Table; import io.debezium.relational.TableId; import io.debezium.relational.Tables; @@ -75,10 +77,11 @@ public class MySqlSourceFetchTaskContext extends JdbcSourceFetchTaskContext { private MySqlDatabaseSchema databaseSchema; private MySqlTaskContextImpl taskContext; private MySqlOffsetContext offsetContext; - private SnapshotChangeEventSourceMetrics snapshotChangeEventSourceMetrics; + private SnapshotChangeEventSourceMetrics snapshotChangeEventSourceMetrics; private MySqlStreamingChangeEventSourceMetrics streamingChangeEventSourceMetrics; private TopicSelector topicSelector; - private JdbcSourceEventDispatcher dispatcher; + private JdbcSourceEventDispatcher dispatcher; + private MySqlPartition mySqlPartition; private ChangeEventQueue queue; private MySqlErrorHandler errorHandler; @@ -109,6 +112,8 @@ public void configure(SourceSplitBase sourceSplitBase) { this.offsetContext = loadStartingOffsetState( new MySqlOffsetContext.Loader(connectorConfig), sourceSplitBase); + this.mySqlPartition = new MySqlPartition(connectorConfig.getLogicalName()); + validateAndLoadDatabaseHistory(offsetContext, databaseSchema); this.taskContext = @@ -131,7 +136,7 @@ public void configure(SourceSplitBase sourceSplitBase) { // .buffering() .build(); this.dispatcher = - new JdbcSourceEventDispatcher( + new JdbcSourceEventDispatcher<>( connectorConfig, topicSelector, databaseSchema, @@ -152,7 +157,7 @@ public void configure(SourceSplitBase sourceSplitBase) { (MySqlStreamingChangeEventSourceMetrics) changeEventSourceMetricsFactory.getStreamingMetrics( taskContext, queue, metadataProvider); - this.errorHandler = new MySqlErrorHandler(connectorConfig.getLogicalName(), queue); + this.errorHandler = new MySqlErrorHandler(connectorConfig, queue); } @Override @@ -168,6 +173,11 @@ public BinaryLogClient getBinaryLogClient() { return binaryLogClient; } + @Override + public MySqlPartition getPartition() { + return mySqlPartition; + } + public MySqlTaskContextImpl getTaskContext() { return taskContext; } @@ -182,7 +192,7 @@ public MySqlOffsetContext getOffsetContext() { return offsetContext; } - public SnapshotChangeEventSourceMetrics getSnapshotChangeEventSourceMetrics() { + public SnapshotChangeEventSourceMetrics getSnapshotChangeEventSourceMetrics() { return snapshotChangeEventSourceMetrics; } @@ -206,7 +216,7 @@ public RowType getSplitType(Table table) { } @Override - public JdbcSourceEventDispatcher getDispatcher() { + public JdbcSourceEventDispatcher getDispatcher() { return dispatcher; } @@ -227,14 +237,13 @@ public Offset getStreamOffset(SourceRecord sourceRecord) { /** Loads the connector's persistent offset (if present) via the given loader. */ private MySqlOffsetContext loadStartingOffsetState( - OffsetContext.Loader loader, SourceSplitBase mySqlSplit) { + OffsetContext.Loader loader, SourceSplitBase mySqlSplit) { Offset offset = mySqlSplit.isSnapshotSplit() ? BinlogOffset.INITIAL_OFFSET : mySqlSplit.asStreamSplit().getStartingOffset(); - MySqlOffsetContext mySqlOffsetContext = - (MySqlOffsetContext) loader.load(offset.getOffset()); + MySqlOffsetContext mySqlOffsetContext = loader.load(offset.getOffset()); if (!isBinlogAvailable(mySqlOffsetContext)) { throw new IllegalStateException( @@ -274,11 +283,11 @@ private boolean isBinlogAvailable(MySqlOffsetContext offset) { private void validateAndLoadDatabaseHistory( MySqlOffsetContext offset, MySqlDatabaseSchema schema) { schema.initializeStorage(); - schema.recover(offset); + schema.recover(Offsets.of(mySqlPartition, offset)); } /** A subclass implementation of {@link MySqlTaskContext} which reuses one BinaryLogClient. */ - public class MySqlTaskContextImpl extends MySqlTaskContext { + public static class MySqlTaskContextImpl extends MySqlTaskContext { private final BinaryLogClient reusedBinaryLogClient; diff --git a/flink-cdc-base/src/test/java/com/ververica/cdc/connectors/base/experimental/fetch/MySqlStreamFetchTask.java b/flink-cdc-base/src/test/java/com/ververica/cdc/connectors/base/experimental/fetch/MySqlStreamFetchTask.java index e3bb780a74a..cdc46424bac 100644 --- a/flink-cdc-base/src/test/java/com/ververica/cdc/connectors/base/experimental/fetch/MySqlStreamFetchTask.java +++ b/flink-cdc-base/src/test/java/com/ververica/cdc/connectors/base/experimental/fetch/MySqlStreamFetchTask.java @@ -27,6 +27,7 @@ import io.debezium.connector.mysql.MySqlConnection; import io.debezium.connector.mysql.MySqlConnectorConfig; import io.debezium.connector.mysql.MySqlOffsetContext; +import io.debezium.connector.mysql.MySqlPartition; import io.debezium.connector.mysql.MySqlStreamingChangeEventSource; import io.debezium.connector.mysql.MySqlStreamingChangeEventSourceMetrics; import io.debezium.connector.mysql.MySqlTaskContext; @@ -59,7 +60,6 @@ public void execute(Context context) throws Exception { binlogSplitReadTask = new MySqlBinlogSplitReadTask( sourceFetchContext.getDbzConnectorConfig(), - sourceFetchContext.getOffsetContext(), sourceFetchContext.getConnection(), sourceFetchContext.getDispatcher(), sourceFetchContext.getErrorHandler(), @@ -69,7 +69,9 @@ public void execute(Context context) throws Exception { BinlogSplitChangeEventSourceContext changeEventSourceContext = new BinlogSplitChangeEventSourceContext(); binlogSplitReadTask.execute( - changeEventSourceContext, sourceFetchContext.getOffsetContext()); + changeEventSourceContext, + sourceFetchContext.getPartition(), + sourceFetchContext.getOffsetContext()); } @Override @@ -90,16 +92,14 @@ public static class MySqlBinlogSplitReadTask extends MySqlStreamingChangeEventSo private static final Logger LOG = LoggerFactory.getLogger(MySqlBinlogSplitReadTask.class); private final StreamSplit binlogSplit; - private final MySqlOffsetContext offsetContext; - private final JdbcSourceEventDispatcher dispatcher; + private final JdbcSourceEventDispatcher dispatcher; private final ErrorHandler errorHandler; private ChangeEventSourceContext context; public MySqlBinlogSplitReadTask( MySqlConnectorConfig connectorConfig, - MySqlOffsetContext offsetContext, MySqlConnection connection, - JdbcSourceEventDispatcher dispatcher, + JdbcSourceEventDispatcher dispatcher, ErrorHandler errorHandler, MySqlTaskContext taskContext, MySqlStreamingChangeEventSourceMetrics metrics, @@ -114,20 +114,23 @@ public MySqlBinlogSplitReadTask( metrics); this.binlogSplit = binlogSplit; this.dispatcher = dispatcher; - this.offsetContext = offsetContext; this.errorHandler = errorHandler; } @Override - public void execute(ChangeEventSourceContext context, MySqlOffsetContext offsetContext) + public void execute( + ChangeEventSourceContext context, + MySqlPartition partition, + MySqlOffsetContext offsetContext) throws InterruptedException { this.context = context; - super.execute(context, offsetContext); + super.execute(context, partition, offsetContext); } @Override - protected void handleEvent(MySqlOffsetContext offsetContext, Event event) { - super.handleEvent(offsetContext, event); + protected void handleEvent( + MySqlPartition partition, MySqlOffsetContext offsetContext, Event event) { + super.handleEvent(partition, offsetContext, event); // check do we need to stop for fetch binlog for snapshot split. if (isBoundedRead()) { final BinlogOffset currentBinlogOffset = @@ -137,7 +140,7 @@ protected void handleEvent(MySqlOffsetContext offsetContext, Event event) { // send binlog end event try { dispatcher.dispatchWatermarkEvent( - offsetContext.getPartition(), + partition.getSourcePartition(), binlogSplit, currentBinlogOffset, WatermarkKind.END); diff --git a/flink-cdc-base/src/test/java/com/ververica/cdc/connectors/base/experimental/utils/MySqlSchema.java b/flink-cdc-base/src/test/java/com/ververica/cdc/connectors/base/experimental/utils/MySqlSchema.java index abe69e45607..5ee98e98bd1 100644 --- a/flink-cdc-base/src/test/java/com/ververica/cdc/connectors/base/experimental/utils/MySqlSchema.java +++ b/flink-cdc-base/src/test/java/com/ververica/cdc/connectors/base/experimental/utils/MySqlSchema.java @@ -22,6 +22,7 @@ import io.debezium.connector.mysql.MySqlConnectorConfig; import io.debezium.connector.mysql.MySqlDatabaseSchema; import io.debezium.connector.mysql.MySqlOffsetContext; +import io.debezium.connector.mysql.MySqlPartition; import io.debezium.jdbc.JdbcConnection; import io.debezium.relational.TableId; import io.debezium.relational.history.TableChanges; @@ -76,9 +77,15 @@ private TableChanges.TableChange readTableSchema(JdbcConnection jdbc, TableId ta final String ddl = rs.getString(2); final MySqlOffsetContext offsetContext = MySqlOffsetContext.initial(connectorConfig); + final MySqlPartition partition = + new MySqlPartition(connectorConfig.getLogicalName()); List schemaChangeEvents = databaseSchema.parseSnapshotDdl( - ddl, tableId.catalog(), offsetContext, Instant.now()); + partition, + ddl, + tableId.catalog(), + offsetContext, + Instant.now()); for (SchemaChangeEvent schemaChangeEvent : schemaChangeEvents) { for (TableChanges.TableChange tableChange : schemaChangeEvent.getTableChanges()) { diff --git a/flink-cdc-e2e-tests/src/test/java/com/ververica/cdc/connectors/tests/OracleE2eITCase.java b/flink-cdc-e2e-tests/src/test/java/com/ververica/cdc/connectors/tests/OracleE2eITCase.java index 51b8d054fa4..d91e388ba0b 100644 --- a/flink-cdc-e2e-tests/src/test/java/com/ververica/cdc/connectors/tests/OracleE2eITCase.java +++ b/flink-cdc-e2e-tests/src/test/java/com/ververica/cdc/connectors/tests/OracleE2eITCase.java @@ -81,6 +81,7 @@ public void after() { public void testOracleCDC() throws Exception { List sqlLines = Arrays.asList( + "SET 'execution.checkpointing.interval' = '3s';", "CREATE TABLE products_source (", " ID INT NOT NULL,", " NAME STRING,", diff --git a/flink-connector-debezium/src/main/java/com/ververica/cdc/debezium/internal/DebeziumChangeFetcher.java b/flink-connector-debezium/src/main/java/com/ververica/cdc/debezium/internal/DebeziumChangeFetcher.java index dd5cef4d569..24366c806b1 100644 --- a/flink-connector-debezium/src/main/java/com/ververica/cdc/debezium/internal/DebeziumChangeFetcher.java +++ b/flink-connector-debezium/src/main/java/com/ververica/cdc/debezium/internal/DebeziumChangeFetcher.java @@ -235,7 +235,7 @@ private void handleBatch(List> changeEve deserialization.deserialize(record, debeziumCollector); - if (!isSnapshotRecord(record)) { + if (isInDbSnapshotPhase && !isSnapshotRecord(record)) { LOG.debug("Snapshot phase finishes."); isInDbSnapshotPhase = false; } diff --git a/flink-connector-debezium/src/main/java/com/ververica/cdc/debezium/internal/FlinkOffsetBackingStore.java b/flink-connector-debezium/src/main/java/com/ververica/cdc/debezium/internal/FlinkOffsetBackingStore.java index 2c5d1fc0a5f..8147074eef7 100644 --- a/flink-connector-debezium/src/main/java/com/ververica/cdc/debezium/internal/FlinkOffsetBackingStore.java +++ b/flink-connector-debezium/src/main/java/com/ververica/cdc/debezium/internal/FlinkOffsetBackingStore.java @@ -63,6 +63,7 @@ public class FlinkOffsetBackingStore implements OffsetBackingStore { protected Map data = new HashMap<>(); protected ExecutorService executor; + @SuppressWarnings("unchecked") @Override public void configure(WorkerConfig config) { // eagerly initialize the executor, because OffsetStorageWriter will use it later diff --git a/flink-connector-debezium/src/main/java/io/debezium/relational/HistorizedRelationalDatabaseConnectorConfig.java b/flink-connector-debezium/src/main/java/io/debezium/relational/HistorizedRelationalDatabaseConnectorConfig.java new file mode 100644 index 00000000000..98d25b74f11 --- /dev/null +++ b/flink-connector-debezium/src/main/java/io/debezium/relational/HistorizedRelationalDatabaseConnectorConfig.java @@ -0,0 +1,175 @@ +/* + * Copyright Debezium Authors. + * + * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 + */ + +package io.debezium.relational; + +import io.debezium.config.ConfigDefinition; +import io.debezium.config.Configuration; +import io.debezium.config.Field; +import io.debezium.relational.Selectors.TableIdToStringMapper; +import io.debezium.relational.Tables.TableFilter; +import io.debezium.relational.history.DatabaseHistory; +import io.debezium.relational.history.DatabaseHistoryListener; +import io.debezium.relational.history.DatabaseHistoryMetrics; +import io.debezium.relational.history.HistoryRecordComparator; +import io.debezium.relational.history.KafkaDatabaseHistory; +import org.apache.kafka.common.config.ConfigDef.Importance; +import org.apache.kafka.common.config.ConfigDef.Type; +import org.apache.kafka.common.config.ConfigDef.Width; +import org.apache.kafka.connect.errors.ConnectException; +import org.apache.kafka.connect.source.SourceConnector; + +/** + * Copied from Debezium project. Configuration options shared across the relational CDC connectors + * which use a persistent database schema history. + * + *

Added JMX_METRICS_ENABLED option. + */ +public abstract class HistorizedRelationalDatabaseConnectorConfig + extends RelationalDatabaseConnectorConfig { + + protected static final int DEFAULT_SNAPSHOT_FETCH_SIZE = 2_000; + + private boolean useCatalogBeforeSchema; + private final String logicalName; + private final Class connectorClass; + private final boolean multiPartitionMode; + + /** + * The database history class is hidden in the {@link #configDef()} since that is designed to + * work with a user interface, and in these situations using Kafka is the only way to go. + */ + public static final Field DATABASE_HISTORY = + Field.create("database.history") + .withDisplayName("Database history class") + .withType(Type.CLASS) + .withWidth(Width.LONG) + .withImportance(Importance.LOW) + .withInvisibleRecommender() + .withDescription( + "The name of the DatabaseHistory class that should be used to store and recover database schema changes. " + + "The configuration properties for the history are prefixed with the '" + + DatabaseHistory.CONFIGURATION_FIELD_PREFIX_STRING + + "' string.") + .withDefault(KafkaDatabaseHistory.class.getName()); + + public static final Field JMX_METRICS_ENABLED = + Field.create(DatabaseHistory.CONFIGURATION_FIELD_PREFIX_STRING + "metrics.enabled") + .withDisplayName("Skip DDL statements that cannot be parsed") + .withType(Type.BOOLEAN) + .withImportance(Importance.LOW) + .withDescription("Whether to enable JMX history metrics") + .withDefault(false); + + protected static final ConfigDefinition CONFIG_DEFINITION = + RelationalDatabaseConnectorConfig.CONFIG_DEFINITION + .edit() + .history( + DATABASE_HISTORY, + DatabaseHistory.SKIP_UNPARSEABLE_DDL_STATEMENTS, + DatabaseHistory.STORE_ONLY_MONITORED_TABLES_DDL, + DatabaseHistory.STORE_ONLY_CAPTURED_TABLES_DDL, + KafkaDatabaseHistory.BOOTSTRAP_SERVERS, + KafkaDatabaseHistory.TOPIC, + KafkaDatabaseHistory.RECOVERY_POLL_ATTEMPTS, + KafkaDatabaseHistory.RECOVERY_POLL_INTERVAL_MS, + KafkaDatabaseHistory.KAFKA_QUERY_TIMEOUT_MS) + .create(); + + protected HistorizedRelationalDatabaseConnectorConfig( + Class connectorClass, + Configuration config, + String logicalName, + TableFilter systemTablesFilter, + boolean useCatalogBeforeSchema, + int defaultSnapshotFetchSize, + ColumnFilterMode columnFilterMode, + boolean multiPartitionMode) { + super( + config, + logicalName, + systemTablesFilter, + TableId::toString, + defaultSnapshotFetchSize, + columnFilterMode); + this.useCatalogBeforeSchema = useCatalogBeforeSchema; + this.logicalName = logicalName; + this.connectorClass = connectorClass; + this.multiPartitionMode = multiPartitionMode; + } + + protected HistorizedRelationalDatabaseConnectorConfig( + Class connectorClass, + Configuration config, + String logicalName, + TableFilter systemTablesFilter, + TableIdToStringMapper tableIdMapper, + boolean useCatalogBeforeSchema, + ColumnFilterMode columnFilterMode, + boolean multiPartitionMode) { + super( + config, + logicalName, + systemTablesFilter, + tableIdMapper, + DEFAULT_SNAPSHOT_FETCH_SIZE, + columnFilterMode); + this.useCatalogBeforeSchema = useCatalogBeforeSchema; + this.logicalName = logicalName; + this.connectorClass = connectorClass; + this.multiPartitionMode = multiPartitionMode; + } + + /** Returns a configured (but not yet started) instance of the database history. */ + public DatabaseHistory getDatabaseHistory() { + Configuration config = getConfig(); + + DatabaseHistory databaseHistory = + config.getInstance( + HistorizedRelationalDatabaseConnectorConfig.DATABASE_HISTORY, + DatabaseHistory.class); + if (databaseHistory == null) { + throw new ConnectException( + "Unable to instantiate the database history class " + + config.getString( + HistorizedRelationalDatabaseConnectorConfig.DATABASE_HISTORY)); + } + + // Do not remove the prefix from the subset of config properties ... + Configuration dbHistoryConfig = + config.subset(DatabaseHistory.CONFIGURATION_FIELD_PREFIX_STRING, false) + .edit() + .withDefault(DatabaseHistory.NAME, getLogicalName() + "-dbhistory") + .withDefault( + KafkaDatabaseHistory.INTERNAL_CONNECTOR_CLASS, + connectorClass.getName()) + .withDefault(KafkaDatabaseHistory.INTERNAL_CONNECTOR_ID, logicalName) + .build(); + + DatabaseHistoryListener listener = + config.getBoolean(JMX_METRICS_ENABLED) + ? new DatabaseHistoryMetrics(this, multiPartitionMode) + : DatabaseHistoryListener.NOOP; + + HistoryRecordComparator historyComparator = getHistoryRecordComparator(); + databaseHistory.configure( + dbHistoryConfig, historyComparator, listener, useCatalogBeforeSchema); // validates + + return databaseHistory; + } + + public boolean useCatalogBeforeSchema() { + return useCatalogBeforeSchema; + } + + /** + * Returns a comparator to be used when recovering records from the schema history, making sure + * no history entries newer than the offset we resume from are recovered (which could happen + * when restarting a connector after history records have been persisted but no new offset has + * been committed yet). + */ + protected abstract HistoryRecordComparator getHistoryRecordComparator(); +} diff --git a/flink-connector-mongodb-cdc/src/main/java/com/ververica/cdc/connectors/mongodb/source/reader/fetch/MongoDBFetchTaskContext.java b/flink-connector-mongodb-cdc/src/main/java/com/ververica/cdc/connectors/mongodb/source/reader/fetch/MongoDBFetchTaskContext.java index c365808a064..c8175e34779 100644 --- a/flink-connector-mongodb-cdc/src/main/java/com/ververica/cdc/connectors/mongodb/source/reader/fetch/MongoDBFetchTaskContext.java +++ b/flink-connector-mongodb-cdc/src/main/java/com/ververica/cdc/connectors/mongodb/source/reader/fetch/MongoDBFetchTaskContext.java @@ -65,8 +65,11 @@ public MongoDBFetchTaskContext( } public void configure(SourceSplitBase sourceSplitBase) { - final int queueSize = - sourceSplitBase.isSnapshotSplit() ? Integer.MAX_VALUE : sourceConfig.getBatchSize(); + // we need to use small batch size instead of INT.MAX as earlier because + // now under the hood of debezium the ArrayDequeue was used as queue implementation + // TODO: replace getBatchSize with getSnapshotBatchSize + // when SNAPSHOT_BATCH_SIZE option will be added + final int queueSize = sourceConfig.getBatchSize(); this.changeEventQueue = new ChangeEventQueue.Builder() diff --git a/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/debezium/DebeziumUtils.java b/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/debezium/DebeziumUtils.java index 7f8f5c29ddf..0782ef26629 100644 --- a/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/debezium/DebeziumUtils.java +++ b/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/debezium/DebeziumUtils.java @@ -49,6 +49,7 @@ /** Utilities related to Debezium. */ public class DebeziumUtils { + private static final String QUOTED_CHARACTER = "`"; private static final Logger LOG = LoggerFactory.getLogger(DebeziumUtils.class); @@ -58,8 +59,8 @@ public static JdbcConnection openJdbcConnection(MySqlSourceConfig sourceConfig) new JdbcConnection( JdbcConfiguration.adapt(sourceConfig.getDbzConfiguration()), new JdbcConnectionFactory(sourceConfig), - "`", - "`"); + QUOTED_CHARACTER, + QUOTED_CHARACTER); try { jdbc.connect(); } catch (Exception e) { diff --git a/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/debezium/dispatcher/EventDispatcherImpl.java b/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/debezium/dispatcher/EventDispatcherImpl.java index 9c62879ac52..b6c3c9755a1 100644 --- a/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/debezium/dispatcher/EventDispatcherImpl.java +++ b/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/debezium/dispatcher/EventDispatcherImpl.java @@ -22,6 +22,7 @@ import io.debezium.document.DocumentWriter; import io.debezium.pipeline.DataChangeEvent; import io.debezium.pipeline.EventDispatcher; +import io.debezium.pipeline.source.snapshot.incremental.IncrementalSnapshotChangeEventSource; import io.debezium.pipeline.source.spi.EventMetadataProvider; import io.debezium.pipeline.spi.ChangeEventCreator; import io.debezium.pipeline.spi.SchemaChangeEventEmitter; @@ -127,6 +128,26 @@ public ChangeEventQueue getQueue() { return queue; } + @Override + public void dispatchSchemaChangeEvent( + MySqlPartition partition, + T dataCollectionId, + SchemaChangeEventEmitter schemaChangeEventEmitter) + throws InterruptedException { + if (dataCollectionId != null && !filter.isIncluded(dataCollectionId)) { + if (historizedSchema == null || historizedSchema.storeOnlyCapturedTables()) { + LOG.trace("Filtering schema change event for {}", dataCollectionId); + return; + } + } + schemaChangeEventEmitter.emitSchemaChangeEvent(new SchemaChangeEventReceiver()); + IncrementalSnapshotChangeEventSource incrementalEventSource = + getIncrementalSnapshotChangeEventSource(); + if (incrementalEventSource != null) { + incrementalEventSource.processSchemaChange(partition, dataCollectionId); + } + } + @Override public void dispatchSchemaChangeEvent( Collection dataCollectionIds, SchemaChangeEventEmitter schemaChangeEventEmitter) diff --git a/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/debezium/reader/BinlogSplitReader.java b/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/debezium/reader/BinlogSplitReader.java index b1594719057..9eb09e238b2 100644 --- a/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/debezium/reader/BinlogSplitReader.java +++ b/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/debezium/reader/BinlogSplitReader.java @@ -200,6 +200,7 @@ public void close() { READER_CLOSE_TIMEOUT); } } + statefulTaskContext.getDatabaseSchema().close(); } catch (Exception e) { LOG.error("Close binlog reader error", e); } diff --git a/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/debezium/reader/SnapshotSplitReader.java b/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/debezium/reader/SnapshotSplitReader.java index d59c3b3db3f..96643896d5e 100644 --- a/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/debezium/reader/SnapshotSplitReader.java +++ b/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/debezium/reader/SnapshotSplitReader.java @@ -341,6 +341,9 @@ public void close() { if (statefulTaskContext.getBinaryLogClient() != null) { statefulTaskContext.getBinaryLogClient().disconnect(); } + if (statefulTaskContext.getDatabaseSchema() != null) { + statefulTaskContext.getDatabaseSchema().close(); + } if (executorService != null) { executorService.shutdown(); if (!executorService.awaitTermination(READER_CLOSE_TIMEOUT, TimeUnit.SECONDS)) { diff --git a/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/debezium/task/context/StatefulTaskContext.java b/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/debezium/task/context/StatefulTaskContext.java index 4f43c13beb5..061767f55d7 100644 --- a/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/debezium/task/context/StatefulTaskContext.java +++ b/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/debezium/task/context/StatefulTaskContext.java @@ -57,6 +57,7 @@ import java.time.Instant; import java.util.List; import java.util.Map; +import java.util.Optional; import static com.ververica.cdc.connectors.mysql.source.offset.BinlogOffset.BINLOG_FILENAME_OFFSET_KEY; import static com.ververica.cdc.connectors.mysql.source.offset.BinlogOffsetUtils.initializeEffectiveOffset; @@ -85,10 +86,10 @@ public class StatefulTaskContext { private MySqlOffsetContext offsetContext; private MySqlPartition mySqlPartition; private TopicSelector topicSelector; - private SnapshotChangeEventSourceMetrics snapshotChangeEventSourceMetrics; - private StreamingChangeEventSourceMetrics streamingChangeEventSourceMetrics; + private SnapshotChangeEventSourceMetrics snapshotChangeEventSourceMetrics; + private StreamingChangeEventSourceMetrics streamingChangeEventSourceMetrics; private EventDispatcherImpl dispatcher; - private EventDispatcher.SnapshotReceiver snapshotReceiver; + private EventDispatcher.SnapshotReceiver snapshotReceiver; private SignalEventDispatcher signalEventDispatcher; private ChangeEventQueue queue; private ErrorHandler errorHandler; @@ -114,11 +115,12 @@ public void configure(MySqlSplit mySqlSplit) { .getDbzConfiguration() .getString(EmbeddedFlinkDatabaseHistory.DATABASE_HISTORY_INSTANCE_NAME), mySqlSplit.getTableSchemas().values()); + + Optional.ofNullable(databaseSchema).ifPresent(MySqlDatabaseSchema::close); this.databaseSchema = DebeziumUtils.createMySqlDatabaseSchema(connectorConfig, tableIdCaseInsensitive); - this.mySqlPartition = - new MySqlPartition.Provider(connectorConfig).getPartitions().iterator().next(); + this.mySqlPartition = new MySqlPartition(connectorConfig.getLogicalName()); this.offsetContext = loadStartingOffsetState(new MySqlOffsetContext.Loader(connectorConfig), mySqlSplit); @@ -171,8 +173,8 @@ public void configure(MySqlSplit mySqlSplit) { this.streamingChangeEventSourceMetrics = changeEventSourceMetricsFactory.getStreamingMetrics( taskContext, queue, metadataProvider); - this.errorHandler = new MySqlErrorHandler( - connectorConfig, queue, taskContext, sourceConfig); + this.errorHandler = + new MySqlErrorHandler(connectorConfig, queue, taskContext, sourceConfig); } private void validateAndLoadDatabaseHistory( @@ -365,7 +367,7 @@ public EventDispatcherImpl getDispatcher() { return dispatcher; } - public EventDispatcher.SnapshotReceiver getSnapshotReceiver() { + public EventDispatcher.SnapshotReceiver getSnapshotReceiver() { return snapshotReceiver; } @@ -393,11 +395,12 @@ public TopicSelector getTopicSelector() { return topicSelector; } - public SnapshotChangeEventSourceMetrics getSnapshotChangeEventSourceMetrics() { + public SnapshotChangeEventSourceMetrics getSnapshotChangeEventSourceMetrics() { return snapshotChangeEventSourceMetrics; } - public StreamingChangeEventSourceMetrics getStreamingChangeEventSourceMetrics() { + public StreamingChangeEventSourceMetrics + getStreamingChangeEventSourceMetrics() { return streamingChangeEventSourceMetrics; } diff --git a/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/schema/MySqlSchema.java b/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/schema/MySqlSchema.java index d0daa6b4dfb..ce355262d16 100644 --- a/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/schema/MySqlSchema.java +++ b/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/schema/MySqlSchema.java @@ -40,7 +40,7 @@ import static com.ververica.cdc.connectors.mysql.source.utils.StatementUtils.quote; /** A component used to get schema by table path. */ -public class MySqlSchema { +public class MySqlSchema implements AutoCloseable { private static final String SHOW_CREATE_TABLE = "SHOW CREATE TABLE "; private static final String DESC_TABLE = "DESC "; @@ -170,4 +170,9 @@ private void buildSchemaByDescTable( e); } } + + @Override + public void close() { + databaseSchema.close(); + } } diff --git a/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/MySqlSource.java b/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/MySqlSource.java index 8b00598074b..8f33a717a5c 100644 --- a/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/MySqlSource.java +++ b/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/MySqlSource.java @@ -96,6 +96,8 @@ public class MySqlSource private static final long serialVersionUID = 1L; + private static final String ENUMERATOR_SERVER_NAME = "mysql_source_split_enumerator"; + private final MySqlSourceConfigFactory configFactory; private final DebeziumDeserializationSchema deserializationSchema; @@ -164,7 +166,7 @@ public SourceReader createReader(SourceReaderContext readerContex @Override public SplitEnumerator createEnumerator( SplitEnumeratorContext enumContext) { - MySqlSourceConfig sourceConfig = configFactory.createConfig(0); + MySqlSourceConfig sourceConfig = configFactory.createConfig(0, ENUMERATOR_SERVER_NAME); final MySqlValidator validator = new MySqlValidator(sourceConfig); validator.validate(); @@ -193,7 +195,8 @@ public SplitEnumerator createEnumerator( @Override public SplitEnumerator restoreEnumerator( SplitEnumeratorContext enumContext, PendingSplitsState checkpoint) { - MySqlSourceConfig sourceConfig = configFactory.createConfig(0); + + MySqlSourceConfig sourceConfig = configFactory.createConfig(0, ENUMERATOR_SERVER_NAME); final MySqlSplitAssigner splitAssigner; if (checkpoint instanceof HybridPendingSplitsState) { diff --git a/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/assigners/ChunkSplitter.java b/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/assigners/ChunkSplitter.java index acba94c8a8d..31e7629c21c 100644 --- a/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/assigners/ChunkSplitter.java +++ b/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/assigners/ChunkSplitter.java @@ -20,6 +20,7 @@ import com.ververica.cdc.connectors.mysql.source.assigners.state.ChunkSplitterState; import com.ververica.cdc.connectors.mysql.source.split.MySqlSnapshotSplit; +import io.debezium.connector.mysql.MySqlPartition; import io.debezium.relational.TableId; import java.util.List; @@ -38,7 +39,8 @@ public interface ChunkSplitter { * Called to split chunks for a table, the assigner could invoke this method multiple times to * receive all the splits. */ - List splitChunks(TableId tableId) throws Exception; + List splitChunks(MySqlPartition partition, TableId tableId) + throws Exception; /** Get whether the splitter has more chunks for current table. */ boolean hasNextChunk(); diff --git a/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/assigners/MySqlChunkSplitter.java b/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/assigners/MySqlChunkSplitter.java index cdad275eb1a..b5a89922d0a 100644 --- a/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/assigners/MySqlChunkSplitter.java +++ b/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/assigners/MySqlChunkSplitter.java @@ -29,6 +29,7 @@ import com.ververica.cdc.connectors.mysql.source.split.MySqlSnapshotSplit; import com.ververica.cdc.connectors.mysql.source.utils.ChunkUtils; import com.ververica.cdc.connectors.mysql.source.utils.ObjectUtils; +import io.debezium.connector.mysql.MySqlPartition; import io.debezium.jdbc.JdbcConnection; import io.debezium.relational.Column; import io.debezium.relational.Table; @@ -112,11 +113,12 @@ public void open() { } @Override - public List splitChunks(TableId tableId) throws Exception { + public List splitChunks(MySqlPartition partition, TableId tableId) + throws Exception { if (!hasNextChunk()) { - analyzeTable(tableId); + analyzeTable(partition, tableId); Optional> evenlySplitChunks = - trySplitAllEvenlySizedChunks(tableId); + trySplitAllEvenlySizedChunks(partition, tableId); if (evenlySplitChunks.isPresent()) { return evenlySplitChunks.get(); } else { @@ -124,7 +126,8 @@ public List splitChunks(TableId tableId) throws Exception { this.currentSplittingTableId = tableId; this.nextChunkStart = ChunkSplitterState.ChunkBound.START_BOUND; this.nextChunkId = 0; - return Collections.singletonList(splitOneUnevenlySizedChunk(tableId)); + return Collections.singletonList( + splitOneUnevenlySizedChunk(partition, tableId)); } } } else { @@ -132,18 +135,19 @@ public List splitChunks(TableId tableId) throws Exception { currentSplittingTableId.equals(tableId), "Can not split a new table before the previous table splitting finish."); if (currentSplittingTable == null) { - analyzeTable(currentSplittingTableId); + analyzeTable(partition, currentSplittingTableId); } synchronized (lock) { - return Collections.singletonList(splitOneUnevenlySizedChunk(tableId)); + return Collections.singletonList(splitOneUnevenlySizedChunk(partition, tableId)); } } } /** Analyze the meta information for given table. */ - private void analyzeTable(TableId tableId) { + private void analyzeTable(MySqlPartition partition, TableId tableId) { try { - currentSplittingTable = mySqlSchema.getTableSchema(jdbcConnection, tableId).getTable(); + currentSplittingTable = + mySqlSchema.getTableSchema(partition, jdbcConnection, tableId).getTable(); splitColumn = ChunkUtils.getChunkKeyColumn( currentSplittingTable, sourceConfig.getChunkKeyColumn()); @@ -156,7 +160,8 @@ private void analyzeTable(TableId tableId) { } /** Generates one snapshot split (chunk) for the give table path. */ - private MySqlSnapshotSplit splitOneUnevenlySizedChunk(TableId tableId) throws SQLException { + private MySqlSnapshotSplit splitOneUnevenlySizedChunk(MySqlPartition partition, TableId tableId) + throws SQLException { final int chunkSize = sourceConfig.getSplitSize(); final Object chunkStartVal = nextChunkStart.getValue(); LOG.info( @@ -182,12 +187,24 @@ private MySqlSnapshotSplit splitOneUnevenlySizedChunk(TableId tableId) throws SQ if (chunkEnd != null && ObjectUtils.compare(chunkEnd, minMaxOfSplitColumn[1]) <= 0) { nextChunkStart = ChunkSplitterState.ChunkBound.middleOf(chunkEnd); return createSnapshotSplit( - jdbcConnection, tableId, nextChunkId++, splitType, chunkStartVal, chunkEnd); + jdbcConnection, + partition, + tableId, + nextChunkId++, + splitType, + chunkStartVal, + chunkEnd); } else { currentSplittingTableId = null; nextChunkStart = ChunkSplitterState.ChunkBound.END_BOUND; return createSnapshotSplit( - jdbcConnection, tableId, nextChunkId++, splitType, chunkStartVal, null); + jdbcConnection, + partition, + tableId, + nextChunkId++, + splitType, + chunkStartVal, + null); } } @@ -198,14 +215,16 @@ private MySqlSnapshotSplit splitOneUnevenlySizedChunk(TableId tableId) throws SQ * using evenly-sized chunks which is much efficient, using unevenly-sized chunks which will * request many queries and is not efficient. */ - private Optional> trySplitAllEvenlySizedChunks(TableId tableId) { + private Optional> trySplitAllEvenlySizedChunks( + MySqlPartition partition, TableId tableId) { LOG.debug("Try evenly splitting table {} into chunks", tableId); final Object min = minMaxOfSplitColumn[0]; final Object max = minMaxOfSplitColumn[1]; if (min == null || max == null || min.equals(max)) { // empty table, or only one row, return full table scan as a chunk return Optional.of( - generateSplits(tableId, Collections.singletonList(ChunkRange.all()))); + generateSplits( + partition, tableId, Collections.singletonList(ChunkRange.all()))); } final int chunkSize = sourceConfig.getSplitSize(); @@ -216,7 +235,7 @@ private Optional> trySplitAllEvenlySizedChunks(TableId List chunks = splitEvenlySizedChunks( tableId, min, max, approximateRowCnt, chunkSize, dynamicChunkSize); - return Optional.of(generateSplits(tableId, chunks)); + return Optional.of(generateSplits(partition, tableId, chunks)); } else { LOG.debug("beginning unevenly splitting table {} into chunks", tableId); return Optional.empty(); @@ -224,7 +243,8 @@ private Optional> trySplitAllEvenlySizedChunks(TableId } /** Generates all snapshot splits (chunks) from chunk ranges. */ - private List generateSplits(TableId tableId, List chunks) { + private List generateSplits( + MySqlPartition partition, TableId tableId, List chunks) { // convert chunks into splits List splits = new ArrayList<>(); for (int i = 0; i < chunks.size(); i++) { @@ -232,6 +252,7 @@ private List generateSplits(TableId tableId, List schema = new HashMap<>(); - schema.put(tableId, mySqlSchema.getTableSchema(jdbc, tableId)); + schema.put(tableId, mySqlSchema.getTableSchema(partition, jdbc, tableId)); return new MySqlSnapshotSplit( tableId, splitId(tableId, chunkId), @@ -450,5 +472,6 @@ public void close() throws Exception { if (jdbcConnection != null) { jdbcConnection.close(); } + mySqlSchema.close(); } } diff --git a/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/assigners/MySqlSnapshotSplitAssigner.java b/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/assigners/MySqlSnapshotSplitAssigner.java index 021bb55698c..33b9db27e2a 100644 --- a/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/assigners/MySqlSnapshotSplitAssigner.java +++ b/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/assigners/MySqlSnapshotSplitAssigner.java @@ -80,6 +80,8 @@ public class MySqlSnapshotSplitAssigner implements MySqlSplitAssigner { private final int currentParallelism; private final List remainingTables; private final boolean isRemainingTablesCheckpointed; + + private final MySqlPartition partition; private final Object lock = new Object(); private volatile Throwable uncaughtSplitterException; @@ -155,6 +157,8 @@ private MySqlSnapshotSplitAssigner( this.isTableIdCaseSensitive = isTableIdCaseSensitive; this.chunkSplitter = createChunkSplitter(sourceConfig, isTableIdCaseSensitive, chunkSplitterState); + this.partition = + new MySqlPartition(sourceConfig.getMySqlConnectorConfig().getLogicalName()); } @Override @@ -243,7 +247,7 @@ private void splitTable(TableId nextTable) { synchronized (lock) { List splits; try { - splits = chunkSplitter.splitChunks(nextTable); + splits = chunkSplitter.splitChunks(partition, nextTable); } catch (Exception e) { throw new IllegalStateException( "Error when splitting chunks for " + nextTable, e); @@ -508,13 +512,8 @@ private void splitChunksForRemainingTables() { // split the remaining tables for (TableId nextTable : remainingTables) { - splitTable( - new MySqlPartition.Provider(sourceConfig.getMySqlConnectorConfig()) - .getPartitions() - .iterator() - .next(), - nextTable); - } + splitTable(nextTable); + } } catch (Throwable e) { synchronized (lock) { if (uncaughtSplitterException == null) { diff --git a/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/config/MySqlSourceConfigFactory.java b/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/config/MySqlSourceConfigFactory.java index 5f0af864a55..c1c8e53b5cc 100644 --- a/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/config/MySqlSourceConfigFactory.java +++ b/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/config/MySqlSourceConfigFactory.java @@ -250,14 +250,19 @@ public MySqlSourceConfigFactory debeziumProperties(Properties properties) { /** Creates a new {@link MySqlSourceConfig} for the given subtask {@code subtaskId}. */ public MySqlSourceConfig createConfig(int subtaskId) { - Properties props = new Properties(); // hard code server name, because we don't need to distinguish it, docs: // Logical name that identifies and provides a namespace for the particular // MySQL database server/cluster being monitored. The logical name should be // unique across all other connectors, since it is used as a prefix for all // Kafka topic names emanating from this connector. // Only alphanumeric characters and underscores should be used. - props.setProperty("database.server.name", "mysql_binlog_source"); + return createConfig(subtaskId, "mysql_binlog_source"); + } + + /** Creates a new {@link MySqlSourceConfig} for the given subtask {@code subtaskId}. */ + public MySqlSourceConfig createConfig(int subtaskId, String serverName) { + Properties props = new Properties(); + props.setProperty("database.server.name", serverName); props.setProperty("database.hostname", checkNotNull(hostname)); props.setProperty("database.user", checkNotNull(username)); props.setProperty("database.password", checkNotNull(password)); diff --git a/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/reader/MySqlSourceReader.java b/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/reader/MySqlSourceReader.java index ac904d419e1..2a0560cbe29 100644 --- a/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/reader/MySqlSourceReader.java +++ b/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/reader/MySqlSourceReader.java @@ -82,6 +82,7 @@ public class MySqlSourceReader private final Map uncompletedBinlogSplits; private final int subtaskId; private final MySqlSourceReaderContext mySqlSourceReaderContext; + private final MySqlPartition partition; private volatile MySqlBinlogSplit suspendedBinlogSplit; public MySqlSourceReader( @@ -103,6 +104,8 @@ public MySqlSourceReader( this.subtaskId = context.getSourceReaderContext().getIndexOfSubtask(); this.mySqlSourceReaderContext = context; this.suspendedBinlogSplit = null; + this.partition = + new MySqlPartition(sourceConfig.getMySqlConnectorConfig().getLogicalName()); } @Override @@ -374,14 +377,16 @@ private MySqlBinlogSplit discoverTableSchemasForBinlogSplit( Map tableSchemas; if (split.getTableSchemas().isEmpty()) { tableSchemas = - TableDiscoveryUtils.discoverSchemaForCapturedTables(sourceConfig, jdbc); + TableDiscoveryUtils.discoverSchemaForCapturedTables( + partition, sourceConfig, jdbc); LOG.info( "The table schema discovery for binlog split {} success", split.splitId()); } else { List existedTables = new ArrayList<>(split.getTableSchemas().keySet()); tableSchemas = - discoverSchemaForNewAddedTables(existedTables, sourceConfig, jdbc); + discoverSchemaForNewAddedTables( + partition, existedTables, sourceConfig, jdbc); LOG.info( "The table schema discovery for new added tables of binlog split {} success", split.splitId()); diff --git a/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/utils/StatementUtils.java b/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/utils/StatementUtils.java index 8caeb108072..facfb820f29 100644 --- a/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/utils/StatementUtils.java +++ b/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/utils/StatementUtils.java @@ -61,7 +61,7 @@ public static long queryApproximateRowCnt(JdbcConnection jdbc, TableId tableId) // accurate than COUNT(*), but is more efficient for large table. final String useDatabaseStatement = String.format("USE %s;", quote(tableId.catalog())); final String rowCountQuery = String.format("SHOW TABLE STATUS LIKE '%s';", tableId.table()); - jdbc.executeWithoutCommitting(useDatabaseStatement); + jdbc.execute(useDatabaseStatement); return jdbc.queryAndMap( rowCountQuery, rs -> { diff --git a/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/utils/TableDiscoveryUtils.java b/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/utils/TableDiscoveryUtils.java index 1d74744ff10..32b33755b8d 100644 --- a/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/utils/TableDiscoveryUtils.java +++ b/flink-connector-mysql-cdc/src/main/java/com/ververica/cdc/connectors/mysql/source/utils/TableDiscoveryUtils.java @@ -104,11 +104,14 @@ public static Map discoverSchemaForCapturedTables( } catch (SQLException e) { throw new FlinkRuntimeException("Failed to discover captured tables", e); } - return discoverSchemaForCapturedTables(capturedTableIds, sourceConfig, jdbc); + return discoverSchemaForCapturedTables(partition, capturedTableIds, sourceConfig, jdbc); } public static Map discoverSchemaForNewAddedTables( - List existedTables, MySqlSourceConfig sourceConfig, MySqlConnection jdbc) { + MySqlPartition partition, + List existedTables, + MySqlSourceConfig sourceConfig, + MySqlConnection jdbc) { final List capturedTableIds; try { capturedTableIds = @@ -120,25 +123,30 @@ public static Map discoverSchemaForNewAddedTables( } return capturedTableIds.isEmpty() ? new HashMap<>() - : discoverSchemaForCapturedTables(capturedTableIds, sourceConfig, jdbc); + : discoverSchemaForCapturedTables(partition, capturedTableIds, sourceConfig, jdbc); } public static Map discoverSchemaForCapturedTables( - List capturedTableIds, MySqlSourceConfig sourceConfig, MySqlConnection jdbc) { + MySqlPartition partition, + List capturedTableIds, + MySqlSourceConfig sourceConfig, + MySqlConnection jdbc) { if (capturedTableIds.isEmpty()) { throw new IllegalArgumentException( String.format( "Can't find any matched tables, please check your configured database-name: %s and table-name: %s", sourceConfig.getDatabaseList(), sourceConfig.getTableList())); } + // fetch table schemas - MySqlSchema mySqlSchema = new MySqlSchema(sourceConfig, jdbc.isTableIdCaseSensitive()); - Map tableSchemas = new HashMap<>(); - for (TableId tableId : capturedTableIds) { - TableChange tableSchema = - mySqlSchema.getTableSchema(partition, jdbc, tableId); - tableSchemas.put(tableId, tableSchema); + try (MySqlSchema mySqlSchema = + new MySqlSchema(sourceConfig, jdbc.isTableIdCaseSensitive())) { + Map tableSchemas = new HashMap<>(); + for (TableId tableId : capturedTableIds) { + TableChange tableSchema = mySqlSchema.getTableSchema(partition, jdbc, tableId); + tableSchemas.put(tableId, tableSchema); + } + return tableSchemas; } - return tableSchemas; } } diff --git a/flink-connector-mysql-cdc/src/main/java/io/debezium/connector/mysql/MySqlConnection.java b/flink-connector-mysql-cdc/src/main/java/io/debezium/connector/mysql/MySqlConnection.java index 1d68becd416..faa4997a87d 100644 --- a/flink-connector-mysql-cdc/src/main/java/io/debezium/connector/mysql/MySqlConnection.java +++ b/flink-connector-mysql-cdc/src/main/java/io/debezium/connector/mysql/MySqlConnection.java @@ -6,6 +6,7 @@ package io.debezium.connector.mysql; +import com.mysql.cj.CharsetMapping; import io.debezium.DebeziumException; import io.debezium.config.CommonConnectorConfig; import io.debezium.config.CommonConnectorConfig.EventProcessingFailureHandlingMode; @@ -14,6 +15,7 @@ import io.debezium.config.Field; import io.debezium.connector.mysql.MySqlConnectorConfig.SecureConnectionMode; import io.debezium.connector.mysql.legacy.MySqlJdbcContext.DatabaseLocales; +import io.debezium.jdbc.JdbcConfiguration; import io.debezium.jdbc.JdbcConnection; import io.debezium.relational.Column; import io.debezium.relational.Table; @@ -24,7 +26,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.sql.Connection; import java.sql.ResultSet; import java.sql.SQLException; import java.time.Duration; @@ -37,23 +38,23 @@ import java.util.Properties; /** - * Copied from Debezium project(1.6.4.final) to add custom jdbc properties in the jdbc url. The new + * Copied from Debezium project(1.9.7.final) to add custom jdbc properties in the jdbc url. The new * parameter {@code jdbcProperties} in the constructor of {@link MySqlConnectionConfiguration} will * be used to generate the jdbc url pattern, and may overwrite the default value. * - *

Line 71: Add field {@code urlPattern} in {@link MySqlConnection} and remove old pattern. + *

Line 75: Add field {@code urlPattern} in {@link MySqlConnection} and remove old pattern. * - *

Line 84: Init {@code urlPattern} using the url pattern from {@link + *

Line 92: Init {@code urlPattern} using the url pattern from {@link * MySqlConnectionConfiguration}. * - *

Line 552: Generate the connection string by the new field {@code urlPattern}. + *

Line 544: Generate the connection string by the new field {@code urlPattern}. * - *

Line 566 ~ 577: Add new constant and field {@code urlPattern} to {@link + *

Line 569 ~ 574: Add new constant and field {@code urlPattern} to {@link * MySqlConnectionConfiguration}. * - *

Line 622 ~ 625: Init new field {@code urlPattern} in {@link MySqlConnectionConfiguration}. + *

Line 625: Init new field {@code urlPattern} in {@link MySqlConnectionConfiguration}. * - *

Line 686 ~ 716: Add some methods helping to generate the url pattern and add default values. + *

Line 715 ~ 741: Add some methods helping to generate the url pattern and add default values. */ public class MySqlConnection extends JdbcConnection { @@ -64,10 +65,12 @@ public class MySqlConnection extends JdbcConnection { "SHOW VARIABLES WHERE Variable_name IN ('character_set_server','collation_server')"; private static final String SQL_SHOW_SESSION_VARIABLE_SSL_VERSION = "SHOW SESSION STATUS LIKE 'Ssl_version'"; + private static final String QUOTED_CHARACTER = "`"; private final Map originalSystemProperties = new HashMap<>(); private final MySqlConnectionConfiguration connectionConfig; - private final MysqlFieldReader mysqlFieldReader; + private final MySqlFieldReader mysqlFieldReader; + private final String urlPattern; /** @@ -77,8 +80,12 @@ public class MySqlConnection extends JdbcConnection { * @param fieldReader binary or text protocol based readers */ public MySqlConnection( - MySqlConnectionConfiguration connectionConfig, MysqlFieldReader fieldReader) { - super(connectionConfig.config(), connectionConfig.factory()); + MySqlConnectionConfiguration connectionConfig, MySqlFieldReader fieldReader) { + super( + connectionConfig.jdbcConfig, + connectionConfig.factory(), + QUOTED_CHARACTER, + QUOTED_CHARACTER); this.connectionConfig = connectionConfig; this.mysqlFieldReader = fieldReader; this.urlPattern = connectionConfig.getUrlPattern(); @@ -90,27 +97,7 @@ public MySqlConnection( * @param connectionConfig {@link MySqlConnectionConfiguration} instance, may not be null. */ public MySqlConnection(MySqlConnectionConfiguration connectionConfig) { - this(connectionConfig, new MysqlTextProtocolFieldReader()); - } - - @Override - public synchronized Connection connection(boolean executeOnConnect) throws SQLException { - if (!isConnected() && connectionConfig.sslModeEnabled()) { - originalSystemProperties.clear(); - // Set the System properties for SSL for the MySQL driver ... - setSystemProperty("javax.net.ssl.keyStore", MySqlConnectorConfig.SSL_KEYSTORE, true); - setSystemProperty( - "javax.net.ssl.keyStorePassword", - MySqlConnectorConfig.SSL_KEYSTORE_PASSWORD, - false); - setSystemProperty( - "javax.net.ssl.trustStore", MySqlConnectorConfig.SSL_TRUSTSTORE, true); - setSystemProperty( - "javax.net.ssl.trustStorePassword", - MySqlConnectorConfig.SSL_TRUSTSTORE_PASSWORD, - false); - } - return super.connection(executeOnConnect); + this(connectionConfig, new MySqlTextProtocolFieldReader(null)); } @Override @@ -202,7 +189,7 @@ protected String setStatementFor(Map variables) { } protected void setSystemProperty(String property, Field field, boolean showValueInError) { - String value = connectionConfig.config().getString(field); + String value = connectionConfig.originalConfig().getString(field); if (value != null) { value = value.trim(); String existingValue = System.getProperty(property); @@ -548,10 +535,25 @@ protected Map readDatabaseCollations() { } } + public MySqlConnectionConfiguration connectionConfig() { + return connectionConfig; + } + public String connectionString() { return connectionString(urlPattern); } + public static String getJavaEncodingForMysqlCharSet(String mysqlCharsetName) { + return CharsetMappingWrapper.getJavaEncodingForMysqlCharSet(mysqlCharsetName); + } + + /** Helper to gain access to protected method. */ + private static final class CharsetMappingWrapper extends CharsetMapping { + static String getJavaEncodingForMysqlCharSet(String mySqlCharsetName) { + return CharsetMapping.getStaticJavaEncodingForMysqlCharset(mySqlCharsetName); + } + } + /** Connection configuration to create a {@link MySqlConnection}. */ public static class MySqlConnectionConfiguration { @@ -559,7 +561,7 @@ public static class MySqlConnectionConfiguration { protected static final String JDBC_PROPERTY_CONNECTION_TIME_ZONE = "connectionTimeZone"; protected static final String JDBC_PROPERTY_LEGACY_SERVER_TIME_ZONE = "serverTimezone"; - private final Configuration jdbcConfig; + private final JdbcConfiguration jdbcConfig; private final ConnectionFactory factory; private final Configuration config; @@ -617,7 +619,7 @@ public MySqlConnectionConfiguration(Configuration config, Properties jdbcPropert jdbcConfigBuilder.with( JDBC_PROPERTY_CONNECTION_TIME_ZONE, determineConnectionTimeZone(dbConfig)); - this.jdbcConfig = jdbcConfigBuilder.build(); + this.jdbcConfig = JdbcConfiguration.adapt(jdbcConfigBuilder.build()); String driverClassName = this.jdbcConfig.getString(MySqlConnectorConfig.JDBC_DRIVER); this.urlPattern = formatJdbcUrl(jdbcProperties); factory = @@ -646,10 +648,14 @@ private static String determineConnectionTimeZone(final Configuration dbConfig) return connectionTimeZone != null ? connectionTimeZone : "SERVER"; } - public Configuration config() { + public JdbcConfiguration config() { return jdbcConfig; } + public Configuration originalConfig() { + return config; + } + public ConnectionFactory factory() { return factory; } @@ -679,6 +685,24 @@ public boolean sslModeEnabled() { return sslMode() != SecureConnectionMode.DISABLED; } + public String sslKeyStore() { + return config.getString(MySqlConnectorConfig.SSL_KEYSTORE); + } + + public char[] sslKeyStorePassword() { + String password = config.getString(MySqlConnectorConfig.SSL_KEYSTORE_PASSWORD); + return Strings.isNullOrBlank(password) ? null : password.toCharArray(); + } + + public String sslTrustStore() { + return config.getString(MySqlConnectorConfig.SSL_TRUSTSTORE); + } + + public char[] sslTrustStorePassword() { + String password = config.getString(MySqlConnectorConfig.SSL_TRUSTSTORE_PASSWORD); + return Strings.isNullOrBlank(password) ? null : password.toCharArray(); + } + public Duration getConnectionTimeout() { return Duration.ofMillis(config.getLong(MySqlConnectorConfig.CONNECTION_TIMEOUT_MS)); } diff --git a/flink-connector-mysql-cdc/src/main/java/io/debezium/connector/mysql/MySqlDefaultValueConverter.java b/flink-connector-mysql-cdc/src/main/java/io/debezium/connector/mysql/MySqlDefaultValueConverter.java deleted file mode 100644 index 671782d5d26..00000000000 --- a/flink-connector-mysql-cdc/src/main/java/io/debezium/connector/mysql/MySqlDefaultValueConverter.java +++ /dev/null @@ -1,451 +0,0 @@ -/* - * Copyright Debezium Authors. - * - * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 - */ - -package io.debezium.connector.mysql; - -import io.debezium.annotation.Immutable; -import io.debezium.relational.Column; -import io.debezium.relational.ColumnEditor; -import io.debezium.relational.ValueConverter; -import org.apache.kafka.connect.data.Field; -import org.apache.kafka.connect.data.Schema; -import org.apache.kafka.connect.data.SchemaBuilder; - -import java.math.BigDecimal; -import java.math.RoundingMode; -import java.sql.Timestamp; -import java.sql.Types; -import java.time.Duration; -import java.time.Instant; -import java.time.LocalDate; -import java.time.LocalDateTime; -import java.time.ZoneId; -import java.time.format.DateTimeFormatter; -import java.time.format.DateTimeFormatterBuilder; -import java.time.temporal.ChronoField; -import java.util.regex.Pattern; - -/** - * Copied from Debezium project(v1.6.4.Final) to fix error when parsing the string default value for - * numeric types. This class should be deleted after https://issues.redhat.com/browse/DBZ-4150 - * included. - * - *

Line 87, Line 114~129: trim the default string value when the type is a numeric type. - */ -@Immutable -public class MySqlDefaultValueConverter { - - private static final Pattern EPOCH_EQUIVALENT_TIMESTAMP = - Pattern.compile( - "(\\d{4}-\\d{2}-00|\\d{4}-00-\\d{2}|0000-\\d{2}-\\d{2}) (00:00:00(\\.\\d{1,6})?)"); - - private static final Pattern EPOCH_EQUIVALENT_DATE = - Pattern.compile("\\d{4}-\\d{2}-00|\\d{4}-00-\\d{2}|0000-\\d{2}-\\d{2}"); - - private static final String EPOCH_TIMESTAMP = "1970-01-01 00:00:00"; - - private static final String EPOCH_DATE = "1970-01-01"; - - private static final DateTimeFormatter ISO_LOCAL_DATE_WITH_OPTIONAL_TIME = - new DateTimeFormatterBuilder() - .append(DateTimeFormatter.ISO_LOCAL_DATE) - .optionalStart() - .appendLiteral(" ") - .append(DateTimeFormatter.ISO_LOCAL_TIME) - .optionalEnd() - .toFormatter(); - - private final MySqlValueConverters converters; - - public MySqlDefaultValueConverter(MySqlValueConverters converters) { - this.converters = converters; - } - - /** - * Converts a default value from the expected format to a logical object acceptable by the main - * JDBC converter. - * - * @param column column definition - * @param value string formatted default value - * @return value converted to a Java type - */ - public Object convert(Column column, String value) { - if (value == null) { - return value; - } - - // boolean is also INT(1) or TINYINT(1) - if ("TINYINT".equals(column.typeName()) || "INT".equals(column.typeName())) { - if ("true".equalsIgnoreCase(value) || "false".equalsIgnoreCase(value)) { - return convertToBoolean(value); - } - } - - value = trimValueIfNeed(column, value); - switch (column.jdbcType()) { - case Types.DATE: - return convertToLocalDate(column, value); - case Types.TIMESTAMP: - return convertToLocalDateTime(column, value); - case Types.TIMESTAMP_WITH_TIMEZONE: - return convertToTimestamp(column, value); - case Types.TIME: - return convertToDuration(column, value); - case Types.BOOLEAN: - return convertToBoolean(value); - case Types.BIT: - return convertToBits(column, value); - - case Types.NUMERIC: - case Types.DECIMAL: - return convertToDecimal(column, value); - - case Types.FLOAT: - case Types.DOUBLE: - case Types.REAL: - return convertToDouble(value); - } - return value; - } - - private String trimValueIfNeed(Column column, String value) { - int jdbcType = column.jdbcType(); - if (jdbcType == Types.DECIMAL - || jdbcType == Types.INTEGER - || jdbcType == Types.TINYINT - || jdbcType == Types.BIGINT - || jdbcType == Types.SMALLINT - || jdbcType == Types.DOUBLE - || jdbcType == Types.FLOAT - || jdbcType == Types.BOOLEAN - || jdbcType == Types.NUMERIC - || jdbcType == Types.REAL) { - return value.trim(); - } - return value; - } - - /** - * Converts a string object for an object type of {@link LocalDate} or {@link LocalDateTime} in - * case of MySql Date type. If the column definition allows null and default value is - * 0000-00-00, we need return null; else 0000-00-00 will be replaced with 1970-01-01; - * - * @param column the column definition describing the {@code data} value; never null - * @param value the string object to be converted into a {@link LocalDate} type or {@link - * LocalDateTime} in case of MySql Date type; - * @return the converted value; - */ - private Object convertToLocalDate(Column column, String value) { - final boolean zero = - EPOCH_EQUIVALENT_DATE.matcher(value).matches() - || EPOCH_EQUIVALENT_TIMESTAMP.matcher(value).matches() - || "0".equals(value); - - if (zero && column.isOptional()) { - return null; - } - if (zero) { - value = EPOCH_DATE; - } - return LocalDate.from(ISO_LOCAL_DATE_WITH_OPTIONAL_TIME.parse(value)); - } - - /** - * Converts a string object for an object type of {@link LocalDateTime}. If the column - * definition allows null and default value is 0000-00-00 00:00:00, we need return null, else - * 0000-00-00 00:00:00 will be replaced with 1970-01-01 00:00:00; - * - * @param column the column definition describing the {@code data} value; never null - * @param value the string object to be converted into a {@link LocalDateTime} type; - * @return the converted value; - */ - private Object convertToLocalDateTime(Column column, String value) { - final boolean matches = - EPOCH_EQUIVALENT_TIMESTAMP.matcher(value).matches() || "0".equals(value); - if (matches) { - if (column.isOptional()) { - return null; - } - - value = EPOCH_TIMESTAMP; - } - - return LocalDateTime.from(timestampFormat(column.length()).parse(value)); - } - - /** - * Converts a string object for an object type of {@link Timestamp}. If the column definition - * allows null and default value is 0000-00-00 00:00:00, we need return null, else 0000-00-00 - * 00:00:00 will be replaced with 1970-01-01 00:00:00; - * - * @param column the column definition describing the {@code data} value; never null - * @param value the string object to be converted into a {@link Timestamp} type; - * @return the converted value; - */ - private Object convertToTimestamp(Column column, String value) { - final boolean matches = - EPOCH_EQUIVALENT_TIMESTAMP.matcher(value).matches() - || "0".equals(value) - || EPOCH_TIMESTAMP.equals(value); - if (matches) { - if (column.isOptional()) { - return null; - } - - return Timestamp.from(Instant.EPOCH); - } - value = cleanTimestamp(value); - return Timestamp.valueOf(value).toInstant().atZone(ZoneId.systemDefault()); - } - - /** - * Converts a string object for an object type of {@link Duration}. - * - * @param column the column definition describing the {@code data} value; never null - * @param value the string object to be converted into a {@link Duration} type; - * @return the converted value; - */ - private Object convertToDuration(Column column, String value) { - return MySqlValueConverters.stringToDuration(value); - } - - /** - * Converts a string object for an expected JDBC type of {@link Types#DOUBLE}. - * - * @param value the string object to be converted into a {@link Types#DOUBLE} type; - * @return the converted value; - */ - private Object convertToDouble(String value) { - return Double.parseDouble(value); - } - - /** - * Converts a string object for an expected JDBC type of {@link Types#DECIMAL}. - * - * @param column the column definition describing the {@code data} value; never null - * @param value the string object to be converted into a {@link Types#DECIMAL} type; - * @return the converted value; - */ - private Object convertToDecimal(Column column, String value) { - return column.scale().isPresent() - ? new BigDecimal(value).setScale(column.scale().get(), RoundingMode.HALF_UP) - : new BigDecimal(value); - } - - /** - * Converts a string object for an expected JDBC type of {@link Types#BIT}. - * - * @param column the column definition describing the {@code data} value; never null - * @param value the string object to be converted into a {@link Types#BIT} type; - * @return the converted value; - */ - private Object convertToBits(Column column, String value) { - if (column.length() > 1) { - return convertToBits(value); - } - return convertToBit(value); - } - - private Object convertToBit(String value) { - try { - return Short.parseShort(value) != 0; - } catch (NumberFormatException ignore) { - return Boolean.parseBoolean(value); - } - } - - private Object convertToBits(String value) { - int nums = value.length() / Byte.SIZE + (value.length() % Byte.SIZE == 0 ? 0 : 1); - byte[] bytes = new byte[nums]; - for (int i = 0; i < nums; i++) { - int s = value.length() - Byte.SIZE < 0 ? 0 : value.length() - Byte.SIZE; - int e = value.length(); - bytes[nums - i - 1] = (byte) Integer.parseInt(value.substring(s, e), 2); - value = value.substring(0, s); - } - return bytes; - } - - /** - * Converts a string object for an expected JDBC type of {@link Types#BOOLEAN}. - * - * @param value the string object to be converted into a {@link Types#BOOLEAN} type; - * @return the converted value; - */ - private Object convertToBoolean(String value) { - try { - return Integer.parseInt(value) != 0; - } catch (NumberFormatException ignore) { - return Boolean.parseBoolean(value); - } - } - - private DateTimeFormatter timestampFormat(int length) { - final DateTimeFormatterBuilder dtf = - new DateTimeFormatterBuilder() - .appendPattern("yyyy-MM-dd") - .optionalStart() - .appendLiteral(" ") - .append(DateTimeFormatter.ISO_LOCAL_TIME) - .optionalEnd() - .parseDefaulting(ChronoField.HOUR_OF_DAY, 0) - .parseDefaulting(ChronoField.MINUTE_OF_HOUR, 0) - .parseDefaulting(ChronoField.SECOND_OF_MINUTE, 0); - if (length > 0) { - dtf.appendFraction(ChronoField.MICRO_OF_SECOND, 0, length, true); - } - return dtf.toFormatter(); - } - - /** - * Clean input timestamp to yyyy-mm-dd hh:mm:ss[.fffffffff] format. - * - * @param s input timestamp - * @return cleaned timestamp - */ - private String cleanTimestamp(String s) { - if (s == null) { - throw new java.lang.IllegalArgumentException("null string"); - } - - s = s.trim(); - - // clean first dash - s = replaceFirstNonNumericSubstring(s, 0, '-'); - // clean second dash - s = replaceFirstNonNumericSubstring(s, s.indexOf('-') + 1, '-'); - // clean dividing space - s = replaceFirstNonNumericSubstring(s, s.indexOf('-', s.indexOf('-') + 1) + 1, ' '); - if (s.indexOf(' ') != -1) { - // clean first colon - s = replaceFirstNonNumericSubstring(s, s.indexOf(' ') + 1, ':'); - if (s.indexOf(':') != -1) { - // clean second colon - s = replaceFirstNonNumericSubstring(s, s.indexOf(':') + 1, ':'); - } - } - - final int maxMonth = 12; - final int maxDay = 31; - - // Parse the date - int firstDash = s.indexOf('-'); - int secondDash = s.indexOf('-', firstDash + 1); - int dividingSpace = s.indexOf(' '); - - // Parse the time - int firstColon = s.indexOf(':', dividingSpace + 1); - int secondColon = s.indexOf(':', firstColon + 1); - int period = s.indexOf('.', secondColon + 1); - - int year = 0; - int month = 0; - int day = 0; - int hour = 0; - int minute = 0; - int second = 0; - - // Get the date - int len = s.length(); - boolean parsedDate = false; - if (firstDash > 0 && secondDash > firstDash) { - year = Integer.parseInt(s.substring(0, firstDash)); - month = Integer.parseInt(s.substring(firstDash + 1, secondDash)); - if (dividingSpace != -1) { - day = Integer.parseInt(s.substring(secondDash + 1, dividingSpace)); - } else { - day = Integer.parseInt(s.substring(secondDash + 1, len)); - } - - if ((month >= 1 && month <= maxMonth) && (day >= 1 && day <= maxDay)) { - parsedDate = true; - } - } - if (!parsedDate) { - throw new java.lang.IllegalArgumentException("Cannot parse the date from " + s); - } - - // Get the time. Hour, minute, second and colons are all optional - if (dividingSpace != -1 && dividingSpace < len - 1) { - if (firstColon == -1) { - hour = Integer.parseInt(s.substring(dividingSpace + 1, len)); - } else { - hour = Integer.parseInt(s.substring(dividingSpace + 1, firstColon)); - if (firstColon < len - 1) { - if (secondColon == -1) { - minute = Integer.parseInt(s.substring(firstColon + 1, len)); - } else { - minute = Integer.parseInt(s.substring(firstColon + 1, secondColon)); - if (secondColon < len - 1) { - if (period == -1) { - second = Integer.parseInt(s.substring(secondColon + 1, len)); - } else { - second = Integer.parseInt(s.substring(secondColon + 1, period)); - } - } - } - } - } - } - - StringBuilder cleanedTimestamp = new StringBuilder(); - cleanedTimestamp = - cleanedTimestamp.append( - String.format( - "%04d-%02d-%02d %02d:%02d:%02d", - year, month, day, hour, minute, second)); - - if (period != -1 && period < len - 1) { - cleanedTimestamp = cleanedTimestamp.append(".").append(s.substring(period + 1)); - } - - return cleanedTimestamp.toString(); - } - - /** - * Replace the first non-numeric substring. - * - * @param s the original string - * @param startIndex the beginning index, inclusive - * @param c the new character - * @return - */ - private String replaceFirstNonNumericSubstring(String s, int startIndex, char c) { - StringBuilder sb = new StringBuilder(); - sb.append(s.substring(0, startIndex)); - - String rest = s.substring(startIndex); - sb.append(rest.replaceFirst("[^\\d]+", Character.toString(c))); - return sb.toString(); - } - - public ColumnEditor setColumnDefaultValue(ColumnEditor columnEditor) { - final Column column = columnEditor.create(); - - // if converters is not null and the default value is not null, we need to convert default - // value - if (converters != null && columnEditor.defaultValue() != null) { - Object defaultValue = columnEditor.defaultValue(); - final SchemaBuilder schemaBuilder = converters.schemaBuilder(column); - if (schemaBuilder == null) { - return columnEditor; - } - final Schema schema = schemaBuilder.build(); - // In order to get the valueConverter for this column, we have to create a field; - // The index value -1 in the field will never used when converting default value; - // So we can set any number here; - final Field field = new Field(columnEditor.name(), -1, schema); - final ValueConverter valueConverter = - converters.converter(columnEditor.create(), field); - if (defaultValue instanceof String) { - defaultValue = convert(column, (String) defaultValue); - } - defaultValue = valueConverter.convert(defaultValue); - columnEditor.defaultValue(defaultValue); - } - return columnEditor; - } -} diff --git a/flink-connector-mysql-cdc/src/main/java/io/debezium/connector/mysql/MySqlStreamingChangeEventSource.java b/flink-connector-mysql-cdc/src/main/java/io/debezium/connector/mysql/MySqlStreamingChangeEventSource.java deleted file mode 100644 index c43e62dd73b..00000000000 --- a/flink-connector-mysql-cdc/src/main/java/io/debezium/connector/mysql/MySqlStreamingChangeEventSource.java +++ /dev/null @@ -1,1469 +0,0 @@ -/* - * Copyright Debezium Authors. - * - * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 - */ - -package io.debezium.connector.mysql; - -import com.github.shyiko.mysql.binlog.BinaryLogClient; -import com.github.shyiko.mysql.binlog.BinaryLogClient.LifecycleListener; -import com.github.shyiko.mysql.binlog.event.DeleteRowsEventData; -import com.github.shyiko.mysql.binlog.event.Event; -import com.github.shyiko.mysql.binlog.event.EventData; -import com.github.shyiko.mysql.binlog.event.EventHeader; -import com.github.shyiko.mysql.binlog.event.EventHeaderV4; -import com.github.shyiko.mysql.binlog.event.EventType; -import com.github.shyiko.mysql.binlog.event.GtidEventData; -import com.github.shyiko.mysql.binlog.event.QueryEventData; -import com.github.shyiko.mysql.binlog.event.RotateEventData; -import com.github.shyiko.mysql.binlog.event.RowsQueryEventData; -import com.github.shyiko.mysql.binlog.event.TableMapEventData; -import com.github.shyiko.mysql.binlog.event.UpdateRowsEventData; -import com.github.shyiko.mysql.binlog.event.WriteRowsEventData; -import com.github.shyiko.mysql.binlog.event.deserialization.EventDataDeserializationException; -import com.github.shyiko.mysql.binlog.event.deserialization.EventDeserializer; -import com.github.shyiko.mysql.binlog.event.deserialization.GtidEventDataDeserializer; -import com.github.shyiko.mysql.binlog.io.ByteArrayInputStream; -import com.github.shyiko.mysql.binlog.network.AuthenticationException; -import com.github.shyiko.mysql.binlog.network.DefaultSSLSocketFactory; -import com.github.shyiko.mysql.binlog.network.SSLMode; -import com.github.shyiko.mysql.binlog.network.SSLSocketFactory; -import com.github.shyiko.mysql.binlog.network.ServerException; -import io.debezium.DebeziumException; -import io.debezium.annotation.SingleThreadAccess; -import io.debezium.config.CommonConnectorConfig.EventProcessingFailureHandlingMode; -import io.debezium.config.Configuration; -import io.debezium.connector.mysql.MySqlConnectorConfig.GtidNewChannelPosition; -import io.debezium.connector.mysql.MySqlConnectorConfig.SecureConnectionMode; -import io.debezium.data.Envelope.Operation; -import io.debezium.function.BlockingConsumer; -import io.debezium.pipeline.ErrorHandler; -import io.debezium.pipeline.EventDispatcher; -import io.debezium.pipeline.source.spi.StreamingChangeEventSource; -import io.debezium.relational.TableId; -import io.debezium.schema.SchemaChangeEvent; -import io.debezium.util.Clock; -import io.debezium.util.Metronome; -import io.debezium.util.Strings; -import io.debezium.util.Threads; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.slf4j.event.Level; - -import javax.net.ssl.KeyManager; -import javax.net.ssl.KeyManagerFactory; -import javax.net.ssl.SSLContext; -import javax.net.ssl.TrustManager; -import javax.net.ssl.X509TrustManager; - -import java.io.FileInputStream; -import java.io.IOException; -import java.security.GeneralSecurityException; -import java.security.KeyStore; -import java.security.KeyStoreException; -import java.security.NoSuchAlgorithmException; -import java.security.UnrecoverableKeyException; -import java.security.cert.CertificateException; -import java.security.cert.X509Certificate; -import java.sql.SQLException; -import java.time.Duration; -import java.time.Instant; -import java.util.EnumMap; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; -import java.util.concurrent.atomic.AtomicLong; -import java.util.function.Predicate; - -import static io.debezium.util.Strings.isNullOrEmpty; - -/** - * Copied from Debezium project to fix https://github.com/ververica/flink-cdc-connectors/issues/939. - * - *

Line 268 ~ 270: Clean cache on rotate event to prevent it from growing indefinitely. We should - * remove this class after we bumped a higher debezium version where the - * https://issues.redhat.com/browse/DBZ-5126 has been fixed. - */ -public class MySqlStreamingChangeEventSource - implements StreamingChangeEventSource { - - private static final Logger LOGGER = - LoggerFactory.getLogger(MySqlStreamingChangeEventSource.class); - - private static final String KEEPALIVE_THREAD_NAME = "blc-keepalive"; - - private final EnumMap> eventHandlers = - new EnumMap<>(EventType.class); - private final BinaryLogClient client; - private final MySqlStreamingChangeEventSourceMetrics metrics; - private final Clock clock; - private final EventProcessingFailureHandlingMode eventDeserializationFailureHandlingMode; - private final EventProcessingFailureHandlingMode inconsistentSchemaHandlingMode; - - private int startingRowNumber = 0; - private long initialEventsToSkip = 0L; - private boolean skipEvent = false; - private boolean ignoreDmlEventByGtidSource = false; - private final Predicate gtidDmlSourceFilter; - private final AtomicLong totalRecordCounter = new AtomicLong(); - private volatile Map lastOffset = null; - private com.github.shyiko.mysql.binlog.GtidSet gtidSet; - private final float heartbeatIntervalFactor = 0.8f; - private final Map binaryLogClientThreads = new ConcurrentHashMap<>(4); - private final MySqlTaskContext taskContext; - private final MySqlConnectorConfig connectorConfig; - private final MySqlConnection connection; - private final EventDispatcher eventDispatcher; - private final ErrorHandler errorHandler; - - @SingleThreadAccess("binlog client thread") - private Instant eventTimestamp; - - /** Describe binlog position. */ - public static class BinlogPosition { - final String filename; - final long position; - - public BinlogPosition(String filename, long position) { - assert filename != null; - - this.filename = filename; - this.position = position; - } - - public String getFilename() { - return filename; - } - - public long getPosition() { - return position; - } - - @Override - public String toString() { - return filename + "/" + position; - } - - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + filename.hashCode(); - result = prime * result + (int) (position ^ (position >>> 32)); - return result; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj == null) { - return false; - } - if (getClass() != obj.getClass()) { - return false; - } - BinlogPosition other = (BinlogPosition) obj; - if (!filename.equals(other.filename)) { - return false; - } - if (position != other.position) { - return false; - } - return true; - } - } - - @FunctionalInterface - private interface BinlogChangeEmitter { - void emit(TableId tableId, T data) throws InterruptedException; - } - - public MySqlStreamingChangeEventSource( - MySqlConnectorConfig connectorConfig, - MySqlConnection connection, - EventDispatcher dispatcher, - ErrorHandler errorHandler, - Clock clock, - MySqlTaskContext taskContext, - MySqlStreamingChangeEventSourceMetrics metrics) { - - this.taskContext = taskContext; - this.connectorConfig = connectorConfig; - this.connection = connection; - this.clock = clock; - this.eventDispatcher = dispatcher; - this.errorHandler = errorHandler; - this.metrics = metrics; - - eventDeserializationFailureHandlingMode = - connectorConfig.getEventProcessingFailureHandlingMode(); - inconsistentSchemaHandlingMode = connectorConfig.inconsistentSchemaFailureHandlingMode(); - - // Set up the log reader ... - client = taskContext.getBinaryLogClient(); - // BinaryLogClient will overwrite thread names later - client.setThreadFactory( - Threads.threadFactory( - MySqlConnector.class, - connectorConfig.getLogicalName(), - "binlog-client", - false, - false, - x -> binaryLogClientThreads.put(x.getName(), x))); - client.setServerId(connectorConfig.serverId()); - client.setSSLMode(sslModeFor(connectorConfig.sslMode())); - if (connectorConfig.sslModeEnabled()) { - SSLSocketFactory sslSocketFactory = - getBinlogSslSocketFactory(connectorConfig, connection); - if (sslSocketFactory != null) { - client.setSslSocketFactory(sslSocketFactory); - } - } - Configuration configuration = connectorConfig.getConfig(); - client.setKeepAlive(configuration.getBoolean(MySqlConnectorConfig.KEEP_ALIVE)); - final long keepAliveInterval = - configuration.getLong(MySqlConnectorConfig.KEEP_ALIVE_INTERVAL_MS); - client.setKeepAliveInterval(keepAliveInterval); - // Considering heartbeatInterval should be less than keepAliveInterval, we use the - // heartbeatIntervalFactor - // multiply by keepAliveInterval and set the result value to heartbeatInterval.The default - // value of heartbeatIntervalFactor - // is 0.8, and we believe the left time (0.2 * keepAliveInterval) is enough to process the - // packet received from the MySQL server. - client.setHeartbeatInterval((long) (keepAliveInterval * heartbeatIntervalFactor)); - - boolean filterDmlEventsByGtidSource = - configuration.getBoolean(MySqlConnectorConfig.GTID_SOURCE_FILTER_DML_EVENTS); - gtidDmlSourceFilter = - filterDmlEventsByGtidSource ? connectorConfig.gtidSourceFilter() : null; - - // Set up the event deserializer with additional type(s) ... - final Map tableMapEventByTableId = - new HashMap(); - EventDeserializer eventDeserializer = - new EventDeserializer() { - @Override - public Event nextEvent(ByteArrayInputStream inputStream) throws IOException { - try { - // Delegate to the superclass ... - Event event = super.nextEvent(inputStream); - - // We have to record the most recent TableMapEventData for each table - // number for our custom deserializers ... - if (event.getHeader().getEventType() == EventType.TABLE_MAP) { - TableMapEventData tableMapEvent = event.getData(); - tableMapEventByTableId.put( - tableMapEvent.getTableId(), tableMapEvent); - } - - // DBZ-5126 Clean cache on rotate event to prevent it from growing - // indefinitely. - if (event.getHeader().getEventType() == EventType.ROTATE) { - tableMapEventByTableId.clear(); - } - return event; - } - // DBZ-217 In case an event couldn't be read we create a pseudo-event for - // the sake of logging - catch (EventDataDeserializationException edde) { - // DBZ-3095 As of Java 15, when reaching EOF in the binlog stream, the - // polling loop in - // BinaryLogClient#listenForEventPackets() keeps returning values != -1 - // from peek(); - // this causes the loop to never finish - // Propagating the exception (either EOF or socket closed) causes the - // loop to be aborted - // in this case - if (edde.getCause() instanceof IOException) { - throw edde; - } - - EventHeaderV4 header = new EventHeaderV4(); - header.setEventType(EventType.INCIDENT); - header.setTimestamp(edde.getEventHeader().getTimestamp()); - header.setServerId(edde.getEventHeader().getServerId()); - - if (edde.getEventHeader() instanceof EventHeaderV4) { - header.setEventLength( - ((EventHeaderV4) edde.getEventHeader()).getEventLength()); - header.setNextPosition( - ((EventHeaderV4) edde.getEventHeader()).getNextPosition()); - header.setFlags(((EventHeaderV4) edde.getEventHeader()).getFlags()); - } - - EventData data = new EventDataDeserializationExceptionData(edde); - return new Event(header, data); - } - } - }; - - // Add our custom deserializers ... - eventDeserializer.setEventDataDeserializer(EventType.STOP, new StopEventDataDeserializer()); - eventDeserializer.setEventDataDeserializer(EventType.GTID, new GtidEventDataDeserializer()); - eventDeserializer.setEventDataDeserializer( - EventType.WRITE_ROWS, - new RowDeserializers.WriteRowsDeserializer(tableMapEventByTableId)); - eventDeserializer.setEventDataDeserializer( - EventType.UPDATE_ROWS, - new RowDeserializers.UpdateRowsDeserializer(tableMapEventByTableId)); - eventDeserializer.setEventDataDeserializer( - EventType.DELETE_ROWS, - new RowDeserializers.DeleteRowsDeserializer(tableMapEventByTableId)); - eventDeserializer.setEventDataDeserializer( - EventType.EXT_WRITE_ROWS, - new RowDeserializers.WriteRowsDeserializer(tableMapEventByTableId) - .setMayContainExtraInformation(true)); - eventDeserializer.setEventDataDeserializer( - EventType.EXT_UPDATE_ROWS, - new RowDeserializers.UpdateRowsDeserializer(tableMapEventByTableId) - .setMayContainExtraInformation(true)); - eventDeserializer.setEventDataDeserializer( - EventType.EXT_DELETE_ROWS, - new RowDeserializers.DeleteRowsDeserializer(tableMapEventByTableId) - .setMayContainExtraInformation(true)); - client.setEventDeserializer(eventDeserializer); - } - - protected void onEvent(MySqlOffsetContext offsetContext, Event event) { - long ts = 0; - - if (event.getHeader().getEventType() == EventType.HEARTBEAT) { - // HEARTBEAT events have no timestamp but are fired only when - // there is no traffic on the connection which means we are caught-up - // https://dev.mysql.com/doc/internals/en/heartbeat-event.html - metrics.setMilliSecondsBehindSource(ts); - return; - } - - // MySQL has seconds resolution but mysql-binlog-connector-java returns - // a value in milliseconds - long eventTs = event.getHeader().getTimestamp(); - - if (eventTs == 0) { - LOGGER.trace("Received unexpected event with 0 timestamp: {}", event); - return; - } - - ts = clock.currentTimeInMillis() - eventTs; - LOGGER.trace("Current milliseconds behind source: {} ms", ts); - metrics.setMilliSecondsBehindSource(ts); - } - - protected void ignoreEvent(MySqlOffsetContext offsetContext, Event event) { - LOGGER.trace("Ignoring event due to missing handler: {}", event); - } - - protected void handleEvent(MySqlOffsetContext offsetContext, Event event) { - if (event == null) { - return; - } - - final EventHeader eventHeader = event.getHeader(); - // Update the source offset info. Note that the client returns the value in *milliseconds*, - // even though the binlog - // contains only *seconds* precision ... - // HEARTBEAT events have no timestamp; only set the timestamp if the event is not a - // HEARTBEAT - eventTimestamp = - !eventHeader.getEventType().equals(EventType.HEARTBEAT) - ? Instant.ofEpochMilli(eventHeader.getTimestamp()) - : null; - offsetContext.setBinlogThread(eventHeader.getServerId()); - - final EventType eventType = eventHeader.getEventType(); - if (eventType == EventType.ROTATE) { - EventData eventData = event.getData(); - RotateEventData rotateEventData; - if (eventData instanceof EventDeserializer.EventDataWrapper) { - rotateEventData = - (RotateEventData) - ((EventDeserializer.EventDataWrapper) eventData).getInternal(); - } else { - rotateEventData = (RotateEventData) eventData; - } - offsetContext.setBinlogStartPoint( - rotateEventData.getBinlogFilename(), rotateEventData.getBinlogPosition()); - } else if (eventHeader instanceof EventHeaderV4) { - EventHeaderV4 trackableEventHeader = (EventHeaderV4) eventHeader; - offsetContext.setEventPosition( - trackableEventHeader.getPosition(), trackableEventHeader.getEventLength()); - } - - // If there is a handler for this event, forward the event to it ... - try { - // Forward the event to the handler ... - eventHandlers - .getOrDefault(eventType, (e) -> ignoreEvent(offsetContext, e)) - .accept(event); - - // Generate heartbeat message if the time is right - eventDispatcher.dispatchHeartbeatEvent(offsetContext); - - // Capture that we've completed another event ... - offsetContext.completeEvent(); - - if (skipEvent) { - // We're in the mode of skipping events and we just skipped this one, so decrement - // our skip count ... - --initialEventsToSkip; - skipEvent = initialEventsToSkip > 0; - } - } catch (RuntimeException e) { - // There was an error in the event handler, so propagate the failure to Kafka Connect - // ... - logStreamingSourceState(); - errorHandler.setProducerThrowable( - new DebeziumException("Error processing binlog event", e)); - // Do not stop the client, since Kafka Connect should stop the connector on it's own - // (and doing it here may cause problems the second time it is stopped). - // We can clear the listeners though so that we ignore all future events ... - eventHandlers.clear(); - LOGGER.info( - "Error processing binlog event, and propagating to Kafka Connect so it stops this connector. Future binlog events read before connector is shutdown will be ignored."); - } catch (InterruptedException e) { - // Most likely because this reader was stopped and our thread was interrupted ... - Thread.currentThread().interrupt(); - eventHandlers.clear(); - LOGGER.info("Stopped processing binlog events due to thread interruption"); - } - } - - @SuppressWarnings("unchecked") - protected T unwrapData(Event event) { - EventData eventData = event.getData(); - if (eventData instanceof EventDeserializer.EventDataWrapper) { - eventData = ((EventDeserializer.EventDataWrapper) eventData).getInternal(); - } - return (T) eventData; - } - - /** - * Handle the supplied event that signals that mysqld has stopped. - * - * @param event the server stopped event to be processed; may not be null - */ - protected void handleServerStop(MySqlOffsetContext offsetContext, Event event) { - LOGGER.debug("Server stopped: {}", event); - } - - /** - * Handle the supplied event that is sent by a primary to a replica to let the replica know that - * the primary is still alive. Not written to a binary log. - * - * @param event the server stopped event to be processed; may not be null - */ - protected void handleServerHeartbeat(MySqlOffsetContext offsetContext, Event event) { - LOGGER.trace("Server heartbeat: {}", event); - } - - /** - * Handle the supplied event that signals that an out of the ordinary event that occurred on the - * master. It notifies the replica that something happened on the primary that might cause data - * to be in an inconsistent state. - * - * @param event the server stopped event to be processed; may not be null - */ - protected void handleServerIncident(MySqlOffsetContext offsetContext, Event event) { - if (event.getData() instanceof EventDataDeserializationExceptionData) { - metrics.onErroneousEvent("source = " + event.toString()); - EventDataDeserializationExceptionData data = event.getData(); - - EventHeaderV4 eventHeader = - (EventHeaderV4) - data.getCause() - .getEventHeader(); // safe cast, instantiated that ourselves - - // logging some additional context but not the exception itself, this will happen in - // handleEvent() - if (eventDeserializationFailureHandlingMode - == EventProcessingFailureHandlingMode.FAIL) { - LOGGER.error( - "Error while deserializing binlog event at offset {}.{}" - + "Use the mysqlbinlog tool to view the problematic event: mysqlbinlog --start-position={} --stop-position={} --verbose {}", - offsetContext.getOffset(), - System.lineSeparator(), - eventHeader.getPosition(), - eventHeader.getNextPosition(), - offsetContext.getSource().binlogFilename()); - - throw new RuntimeException(data.getCause()); - } else if (eventDeserializationFailureHandlingMode - == EventProcessingFailureHandlingMode.WARN) { - LOGGER.warn( - "Error while deserializing binlog event at offset {}.{}" - + "This exception will be ignored and the event be skipped.{}" - + "Use the mysqlbinlog tool to view the problematic event: mysqlbinlog --start-position={} --stop-position={} --verbose {}", - offsetContext.getOffset(), - System.lineSeparator(), - System.lineSeparator(), - eventHeader.getPosition(), - eventHeader.getNextPosition(), - offsetContext.getSource().binlogFilename(), - data.getCause()); - } - } else { - LOGGER.error("Server incident: {}", event); - } - } - - /** - * Handle the supplied event with a {@link RotateEventData} that signals the logs are being - * rotated. This means that either the server was restarted, or the binlog has transitioned to a - * new file. In either case, subsequent table numbers will be different than those seen to this - * point. - * - * @param event the database change data event to be processed; may not be null - */ - protected void handleRotateLogsEvent(MySqlOffsetContext offsetContext, Event event) { - LOGGER.debug("Rotating logs: {}", event); - RotateEventData command = unwrapData(event); - assert command != null; - taskContext.getSchema().clearTableMappings(); - } - - /** - * Handle the supplied event with a {@link GtidEventData} that signals the beginning of a GTID - * transaction. We don't yet know whether this transaction contains any events we're interested - * in, but we have to record it so that we know the position of this event and know we've - * processed the binlog to this point. - * - *

Note that this captures the current GTID and complete GTID set, regardless of whether the - * connector is {@link MySqlTaskContext#gtidSourceFilter() filtering} the GTID set upon - * connection. We do this because we actually want to capture all GTID set values found in the - * binlog, whether or not we process them. However, only when we connect do we actually want to - * pass to MySQL only those GTID ranges that are applicable per the configuration. - * - * @param event the GTID event to be processed; may not be null - */ - protected void handleGtidEvent(MySqlOffsetContext offsetContext, Event event) { - LOGGER.debug("GTID transaction: {}", event); - GtidEventData gtidEvent = unwrapData(event); - String gtid = gtidEvent.getGtid(); - gtidSet.add(gtid); - offsetContext.startGtid(gtid, gtidSet.toString()); // rather than use the client's GTID set - ignoreDmlEventByGtidSource = false; - if (gtidDmlSourceFilter != null && gtid != null) { - String uuid = gtid.trim().substring(0, gtid.indexOf(":")); - if (!gtidDmlSourceFilter.test(uuid)) { - ignoreDmlEventByGtidSource = true; - } - } - metrics.onGtidChange(gtid); - } - - /** - * Handle the supplied event with an {@link RowsQueryEventData} by recording the original SQL - * query that generated the event. - * - * @param event the database change data event to be processed; may not be null - */ - protected void handleRowsQuery(MySqlOffsetContext offsetContext, Event event) { - // Unwrap the RowsQueryEvent - final RowsQueryEventData lastRowsQueryEventData = unwrapData(event); - - // Set the query on the source - offsetContext.setQuery(lastRowsQueryEventData.getQuery()); - } - - /** - * Handle the supplied event with an {@link QueryEventData} by possibly recording the DDL - * statements as changes in the MySQL schemas. - * - * @param event the database change data event to be processed; may not be null - * @throws InterruptedException if this thread is interrupted while recording the DDL statements - */ - protected void handleQueryEvent(MySqlOffsetContext offsetContext, Event event) - throws InterruptedException { - QueryEventData command = unwrapData(event); - LOGGER.debug("Received query command: {}", event); - String sql = command.getSql().trim(); - if (sql.equalsIgnoreCase("BEGIN")) { - // We are starting a new transaction ... - offsetContext.startNextTransaction(); - eventDispatcher.dispatchTransactionStartedEvent( - offsetContext.getTransactionId(), offsetContext); - offsetContext.setBinlogThread(command.getThreadId()); - if (initialEventsToSkip != 0) { - LOGGER.debug( - "Restarting partially-processed transaction; change events will not be created for the first {} events plus {} more rows in the next event", - initialEventsToSkip, - startingRowNumber); - // We are restarting, so we need to skip the events in this transaction that we - // processed previously... - skipEvent = true; - } - return; - } - if (sql.equalsIgnoreCase("COMMIT")) { - handleTransactionCompletion(offsetContext, event); - return; - } - - String upperCasedStatementBegin = Strings.getBegin(sql, 7).toUpperCase(); - - if (upperCasedStatementBegin.startsWith("XA ")) { - // This is an XA transaction, and we currently ignore these and do nothing ... - return; - } - if (connectorConfig.getDdlFilter().test(sql)) { - LOGGER.debug("DDL '{}' was filtered out of processing", sql); - return; - } - if (upperCasedStatementBegin.equals("INSERT ") - || upperCasedStatementBegin.equals("UPDATE ") - || upperCasedStatementBegin.equals("DELETE ")) { - if (eventDeserializationFailureHandlingMode - == EventProcessingFailureHandlingMode.FAIL) { - throw new DebeziumException( - "Received DML '" - + sql - + "' for processing, binlog probably contains events generated with statement or mixed based replication format"); - } else if (eventDeserializationFailureHandlingMode - == EventProcessingFailureHandlingMode.WARN) { - LOGGER.warn( - "Warning only: Received DML '" - + sql - + "' for processing, binlog probably contains events generated with statement or mixed based replication format"); - return; - } else { - return; - } - } - if (sql.equalsIgnoreCase("ROLLBACK")) { - // We have hit a ROLLBACK which is not supported - LOGGER.warn( - "Rollback statements cannot be handled without binlog buffering, the connector will fail. Please check '{}' to see how to enable buffering", - MySqlConnectorConfig.BUFFER_SIZE_FOR_BINLOG_READER.name()); - } - - final List schemaChangeEvents = - taskContext - .getSchema() - .parseStreamingDdl( - sql, - command.getDatabase(), - offsetContext, - clock.currentTimeAsInstant()); - try { - for (SchemaChangeEvent schemaChangeEvent : schemaChangeEvents) { - if (taskContext.getSchema().skipSchemaChangeEvent(schemaChangeEvent)) { - continue; - } - - final TableId tableId = - schemaChangeEvent.getTables().isEmpty() - ? null - : schemaChangeEvent.getTables().iterator().next().id(); - eventDispatcher.dispatchSchemaChangeEvent( - tableId, - (receiver) -> { - try { - receiver.schemaChangeEvent(schemaChangeEvent); - } catch (Exception e) { - throw new DebeziumException(e); - } - }); - } - } catch (InterruptedException e) { - LOGGER.info("Processing interrupted"); - } - } - - private void handleTransactionCompletion(MySqlOffsetContext offsetContext, Event event) - throws InterruptedException { - // We are completing the transaction ... - eventDispatcher.dispatchTransactionCommittedEvent(offsetContext); - offsetContext.commitTransaction(); - offsetContext.setBinlogThread(-1L); - skipEvent = false; - ignoreDmlEventByGtidSource = false; - } - - /** - * Handle a change in the table metadata. - * - *

This method should be called whenever we consume a TABLE_MAP event, and every transaction - * in the log should include one of these for each table affected by the transaction. Each table - * map event includes a monotonically-increasing numeric identifier, and this identifier is used - * within subsequent events within the same transaction. This table identifier can change when: - * - *

    - *
  1. the table structure is modified (e.g., via an {@code ALTER TABLE ...} command); or - *
  2. MySQL rotates to a new binary log file, even if the table structure does not change. - *
- * - * @param event the update event; never null - */ - protected void handleUpdateTableMetadata(MySqlOffsetContext offsetContext, Event event) { - TableMapEventData metadata = unwrapData(event); - long tableNumber = metadata.getTableId(); - String databaseName = metadata.getDatabase(); - String tableName = metadata.getTable(); - TableId tableId = new TableId(databaseName, null, tableName); - if (taskContext.getSchema().assignTableNumber(tableNumber, tableId)) { - LOGGER.debug("Received update table metadata event: {}", event); - } else { - informAboutUnknownTableIfRequired( - offsetContext, event, tableId, "update table metadata"); - } - } - - /** - * If we receive an event for a table that is monitored but whose metadata we don't know, either - * ignore that event or raise a warning or error as per the {@link - * MySqlConnectorConfig#INCONSISTENT_SCHEMA_HANDLING_MODE} configuration. - */ - private void informAboutUnknownTableIfRequired( - MySqlOffsetContext offsetContext, Event event, TableId tableId, String typeToLog) { - if (tableId != null - && connectorConfig.getTableFilters().dataCollectionFilter().isIncluded(tableId)) { - metrics.onErroneousEvent("source = " + tableId + ", event " + event); - EventHeaderV4 eventHeader = event.getHeader(); - - if (inconsistentSchemaHandlingMode == EventProcessingFailureHandlingMode.FAIL) { - LOGGER.error( - "Encountered change event '{}' at offset {} for table {} whose schema isn't known to this connector. One possible cause is an incomplete database history topic. Take a new snapshot in this case.{}" - + "Use the mysqlbinlog tool to view the problematic event: mysqlbinlog --start-position={} --stop-position={} --verbose {}", - event, - offsetContext.getOffset(), - tableId, - System.lineSeparator(), - eventHeader.getPosition(), - eventHeader.getNextPosition(), - offsetContext.getSource().binlogFilename()); - throw new DebeziumException( - "Encountered change event for table " - + tableId - + " whose schema isn't known to this connector"); - } else if (inconsistentSchemaHandlingMode == EventProcessingFailureHandlingMode.WARN) { - LOGGER.warn( - "Encountered change event '{}' at offset {} for table {} whose schema isn't known to this connector. One possible cause is an incomplete database history topic. Take a new snapshot in this case.{}" - + "The event will be ignored.{}" - + "Use the mysqlbinlog tool to view the problematic event: mysqlbinlog --start-position={} --stop-position={} --verbose {}", - event, - offsetContext.getOffset(), - tableId, - System.lineSeparator(), - System.lineSeparator(), - eventHeader.getPosition(), - eventHeader.getNextPosition(), - offsetContext.getSource().binlogFilename()); - } else { - LOGGER.debug( - "Encountered change event '{}' at offset {} for table {} whose schema isn't known to this connector. One possible cause is an incomplete database history topic. Take a new snapshot in this case.{}" - + "The event will be ignored.{}" - + "Use the mysqlbinlog tool to view the problematic event: mysqlbinlog --start-position={} --stop-position={} --verbose {}", - event, - offsetContext.getOffset(), - tableId, - System.lineSeparator(), - System.lineSeparator(), - eventHeader.getPosition(), - eventHeader.getNextPosition(), - offsetContext.getSource().binlogFilename()); - } - } else { - LOGGER.debug( - "Filtering {} event: {} for non-monitored table {}", typeToLog, event, tableId); - metrics.onFilteredEvent("source = " + tableId); - } - } - - /** - * Generate source records for the supplied event with an {@link WriteRowsEventData}. - * - * @param event the database change data event to be processed; may not be null - * @throws InterruptedException if this thread is interrupted while blocking - */ - protected void handleInsert(MySqlOffsetContext offsetContext, Event event) - throws InterruptedException { - handleChange( - offsetContext, - event, - "insert", - WriteRowsEventData.class, - x -> taskContext.getSchema().getTableId(x.getTableId()), - WriteRowsEventData::getRows, - (tableId, row) -> - eventDispatcher.dispatchDataChangeEvent( - tableId, - new MySqlChangeRecordEmitter( - offsetContext, clock, Operation.CREATE, null, row))); - } - - /** - * Generate source records for the supplied event with an {@link UpdateRowsEventData}. - * - * @param event the database change data event to be processed; may not be null - * @throws InterruptedException if this thread is interrupted while blocking - */ - protected void handleUpdate(MySqlOffsetContext offsetContext, Event event) - throws InterruptedException { - handleChange( - offsetContext, - event, - "update", - UpdateRowsEventData.class, - x -> taskContext.getSchema().getTableId(x.getTableId()), - UpdateRowsEventData::getRows, - (tableId, row) -> - eventDispatcher.dispatchDataChangeEvent( - tableId, - new MySqlChangeRecordEmitter( - offsetContext, - clock, - Operation.UPDATE, - row.getKey(), - row.getValue()))); - } - - /** - * Generate source records for the supplied event with an {@link DeleteRowsEventData}. - * - * @param event the database change data event to be processed; may not be null - * @throws InterruptedException if this thread is interrupted while blocking - */ - protected void handleDelete(MySqlOffsetContext offsetContext, Event event) - throws InterruptedException { - handleChange( - offsetContext, - event, - "delete", - DeleteRowsEventData.class, - x -> taskContext.getSchema().getTableId(x.getTableId()), - DeleteRowsEventData::getRows, - (tableId, row) -> - eventDispatcher.dispatchDataChangeEvent( - tableId, - new MySqlChangeRecordEmitter( - offsetContext, clock, Operation.DELETE, row, null))); - } - - private void handleChange( - MySqlOffsetContext offsetContext, - Event event, - String changeType, - Class eventDataClass, - TableIdProvider tableIdProvider, - RowsProvider rowsProvider, - BinlogChangeEmitter changeEmitter) - throws InterruptedException { - if (skipEvent) { - // We can skip this because we should already be at least this far ... - LOGGER.info("Skipping previously processed row event: {}", event); - return; - } - if (ignoreDmlEventByGtidSource) { - LOGGER.debug("Skipping DML event because this GTID source is filtered: {}", event); - return; - } - final T data = unwrapData(event); - final TableId tableId = tableIdProvider.getTableId(data); - final List rows = rowsProvider.getRows(data); - - if (tableId != null && taskContext.getSchema().schemaFor(tableId) != null) { - int count = 0; - int numRows = rows.size(); - if (startingRowNumber < numRows) { - for (int row = startingRowNumber; row != numRows; ++row) { - offsetContext.setRowNumber(row, numRows); - offsetContext.event(tableId, eventTimestamp); - changeEmitter.emit(tableId, rows.get(row)); - count++; - } - if (LOGGER.isDebugEnabled()) { - if (startingRowNumber != 0) { - LOGGER.debug( - "Emitted {} {} record(s) for last {} row(s) in event: {}", - count, - changeType, - numRows - startingRowNumber, - event); - } else { - LOGGER.debug( - "Emitted {} {} record(s) for event: {}", count, changeType, event); - } - } - offsetContext.changeEventCompleted(); - } else { - // All rows were previously processed ... - LOGGER.debug("Skipping previously processed {} event: {}", changeType, event); - } - } else { - informAboutUnknownTableIfRequired(offsetContext, event, tableId, changeType + " row"); - } - startingRowNumber = 0; - } - - /** - * Handle a {@link EventType#VIEW_CHANGE} event. - * - * @param event the database change data event to be processed; may not be null - * @throws InterruptedException if this thread is interrupted while blocking - */ - protected void viewChange(MySqlOffsetContext offsetContext, Event event) - throws InterruptedException { - LOGGER.debug("View Change event: {}", event); - // do nothing - } - - /** - * Handle a {@link EventType#XA_PREPARE} event. - * - * @param event the database change data event to be processed; may not be null - * @throws InterruptedException if this thread is interrupted while blocking - */ - protected void prepareTransaction(MySqlOffsetContext offsetContext, Event event) - throws InterruptedException { - LOGGER.debug("XA Prepare event: {}", event); - // do nothing - } - - private SSLMode sslModeFor(SecureConnectionMode mode) { - switch (mode) { - case DISABLED: - return SSLMode.DISABLED; - case PREFERRED: - return SSLMode.PREFERRED; - case REQUIRED: - return SSLMode.REQUIRED; - case VERIFY_CA: - return SSLMode.VERIFY_CA; - case VERIFY_IDENTITY: - return SSLMode.VERIFY_IDENTITY; - } - return null; - } - - @Override - public void execute(ChangeEventSourceContext context, MySqlOffsetContext offsetContext) - throws InterruptedException { - if (!connectorConfig.getSnapshotMode().shouldStream()) { - LOGGER.info( - "Streaming is disabled for snapshot mode {}", - connectorConfig.getSnapshotMode()); - return; - } - taskContext.getSchema().assureNonEmptySchema(); - final Set skippedOperations = connectorConfig.getSkippedOperations(); - - final MySqlOffsetContext effectiveOffsetContext = - offsetContext != null ? offsetContext : MySqlOffsetContext.initial(connectorConfig); - - // Register our event handlers ... - eventHandlers.put( - EventType.STOP, (event) -> handleServerStop(effectiveOffsetContext, event)); - eventHandlers.put( - EventType.HEARTBEAT, - (event) -> handleServerHeartbeat(effectiveOffsetContext, event)); - eventHandlers.put( - EventType.INCIDENT, (event) -> handleServerIncident(effectiveOffsetContext, event)); - eventHandlers.put( - EventType.ROTATE, (event) -> handleRotateLogsEvent(effectiveOffsetContext, event)); - eventHandlers.put( - EventType.TABLE_MAP, - (event) -> handleUpdateTableMetadata(effectiveOffsetContext, event)); - eventHandlers.put( - EventType.QUERY, (event) -> handleQueryEvent(effectiveOffsetContext, event)); - - if (!skippedOperations.contains(Operation.CREATE)) { - eventHandlers.put( - EventType.WRITE_ROWS, (event) -> handleInsert(effectiveOffsetContext, event)); - eventHandlers.put( - EventType.EXT_WRITE_ROWS, - (event) -> handleInsert(effectiveOffsetContext, event)); - } - - if (!skippedOperations.contains(Operation.UPDATE)) { - eventHandlers.put( - EventType.UPDATE_ROWS, (event) -> handleUpdate(effectiveOffsetContext, event)); - eventHandlers.put( - EventType.EXT_UPDATE_ROWS, - (event) -> handleUpdate(effectiveOffsetContext, event)); - } - - if (!skippedOperations.contains(Operation.DELETE)) { - eventHandlers.put( - EventType.DELETE_ROWS, (event) -> handleDelete(effectiveOffsetContext, event)); - eventHandlers.put( - EventType.EXT_DELETE_ROWS, - (event) -> handleDelete(effectiveOffsetContext, event)); - } - - eventHandlers.put( - EventType.VIEW_CHANGE, (event) -> viewChange(effectiveOffsetContext, event)); - eventHandlers.put( - EventType.XA_PREPARE, (event) -> prepareTransaction(effectiveOffsetContext, event)); - eventHandlers.put( - EventType.XID, - (event) -> handleTransactionCompletion(effectiveOffsetContext, event)); - - // Conditionally register ROWS_QUERY handler to parse SQL statements. - if (connectorConfig.includeSqlQuery()) { - eventHandlers.put( - EventType.ROWS_QUERY, - (event) -> handleRowsQuery(effectiveOffsetContext, event)); - } - - BinaryLogClient.EventListener listener; - if (connectorConfig.bufferSizeForStreamingChangeEventSource() == 0) { - listener = (event) -> handleEvent(effectiveOffsetContext, event); - } else { - EventBuffer buffer = - new EventBuffer( - connectorConfig.bufferSizeForStreamingChangeEventSource(), - this, - context); - listener = (event) -> buffer.add(effectiveOffsetContext, event); - } - client.registerEventListener(listener); - - client.registerLifecycleListener(new ReaderThreadLifecycleListener(effectiveOffsetContext)); - client.registerEventListener((event) -> onEvent(effectiveOffsetContext, event)); - if (LOGGER.isDebugEnabled()) { - client.registerEventListener((event) -> logEvent(effectiveOffsetContext, event)); - } - - final boolean isGtidModeEnabled = connection.isGtidModeEnabled(); - metrics.setIsGtidModeEnabled(isGtidModeEnabled); - - // Get the current GtidSet from MySQL so we can get a filtered/merged GtidSet based off of - // the last Debezium checkpoint. - String availableServerGtidStr = connection.knownGtidSet(); - if (isGtidModeEnabled) { - // The server is using GTIDs, so enable the handler ... - eventHandlers.put( - EventType.GTID, (event) -> handleGtidEvent(effectiveOffsetContext, event)); - - // Now look at the GTID set from the server and what we've previously seen ... - GtidSet availableServerGtidSet = new GtidSet(availableServerGtidStr); - - // also take into account purged GTID logs - GtidSet purgedServerGtidSet = connection.purgedGtidSet(); - LOGGER.info("GTID set purged on server: {}", purgedServerGtidSet); - - GtidSet filteredGtidSet = - filterGtidSet( - effectiveOffsetContext, availableServerGtidSet, purgedServerGtidSet); - if (filteredGtidSet != null) { - // We've seen at least some GTIDs, so start reading from the filtered GTID set ... - LOGGER.info("Registering binlog reader with GTID set: {}", filteredGtidSet); - String filteredGtidSetStr = filteredGtidSet.toString(); - client.setGtidSet(filteredGtidSetStr); - effectiveOffsetContext.setCompletedGtidSet(filteredGtidSetStr); - gtidSet = new com.github.shyiko.mysql.binlog.GtidSet(filteredGtidSetStr); - } else { - // We've not yet seen any GTIDs, so that means we have to start reading the binlog - // from the beginning ... - client.setBinlogFilename(effectiveOffsetContext.getSource().binlogFilename()); - client.setBinlogPosition(effectiveOffsetContext.getSource().binlogPosition()); - gtidSet = new com.github.shyiko.mysql.binlog.GtidSet(""); - } - } else { - // The server is not using GTIDs, so start reading the binlog based upon where we last - // left off ... - client.setBinlogFilename(effectiveOffsetContext.getSource().binlogFilename()); - client.setBinlogPosition(effectiveOffsetContext.getSource().binlogPosition()); - } - - // We may be restarting in the middle of a transaction, so see how far into the transaction - // we have already processed... - initialEventsToSkip = effectiveOffsetContext.eventsToSkipUponRestart(); - LOGGER.info("Skip {} events on streaming start", initialEventsToSkip); - - // Set the starting row number, which is the next row number to be read ... - startingRowNumber = effectiveOffsetContext.rowsToSkipUponRestart(); - LOGGER.info("Skip {} rows on streaming start", startingRowNumber); - - // Only when we reach the first BEGIN event will we start to skip events ... - skipEvent = false; - - try { - // Start the log reader, which starts background threads ... - if (context.isRunning()) { - long timeout = connectorConfig.getConnectionTimeout().toMillis(); - long started = clock.currentTimeInMillis(); - try { - LOGGER.debug( - "Attempting to establish binlog reader connection with timeout of {} ms", - timeout); - client.connect(timeout); - // Need to wait for keepalive thread to be running, otherwise it can be left - // orphaned - // The problem is with timing. When the close is called too early after connect - // then - // the keepalive thread is not terminated - if (client.isKeepAlive()) { - LOGGER.info("Waiting for keepalive thread to start"); - final Metronome metronome = Metronome.parker(Duration.ofMillis(100), clock); - int waitAttempts = 50; - boolean keepAliveThreadRunning = false; - while (!keepAliveThreadRunning && waitAttempts-- > 0) { - for (Thread t : binaryLogClientThreads.values()) { - if (t.getName().startsWith(KEEPALIVE_THREAD_NAME) && t.isAlive()) { - LOGGER.info("Keepalive thread is running"); - keepAliveThreadRunning = true; - } - } - metronome.pause(); - } - } - } catch (TimeoutException e) { - // If the client thread is interrupted *before* the client could connect, the - // client throws a timeout exception - // The only way we can distinguish this is if we get the timeout exception - // before the specified timeout has - // elapsed, so we simply check this (within 10%) ... - long duration = clock.currentTimeInMillis() - started; - if (duration > (0.9 * timeout)) { - double actualSeconds = TimeUnit.MILLISECONDS.toSeconds(duration); - throw new DebeziumException( - "Timed out after " - + actualSeconds - + " seconds while waiting to connect to MySQL at " - + connectorConfig.hostname() - + ":" - + connectorConfig.port() - + " with user '" - + connectorConfig.username() - + "'", - e); - } - // Otherwise, we were told to shutdown, so we don't care about the timeout - // exception - } catch (AuthenticationException e) { - throw new DebeziumException( - "Failed to authenticate to the MySQL database at " - + connectorConfig.hostname() - + ":" - + connectorConfig.port() - + " with user '" - + connectorConfig.username() - + "'", - e); - } catch (Throwable e) { - throw new DebeziumException( - "Unable to connect to the MySQL database at " - + connectorConfig.hostname() - + ":" - + connectorConfig.port() - + " with user '" - + connectorConfig.username() - + "': " - + e.getMessage(), - e); - } - } - while (context.isRunning()) { - Thread.sleep(100); - } - } finally { - try { - client.disconnect(); - } catch (Exception e) { - LOGGER.info("Exception while stopping binary log client", e); - } - } - } - - private SSLSocketFactory getBinlogSslSocketFactory( - MySqlConnectorConfig connectorConfig, MySqlConnection connection) { - String acceptedTlsVersion = connection.getSessionVariableForSslVersion(); - if (!isNullOrEmpty(acceptedTlsVersion)) { - SSLMode sslMode = sslModeFor(connectorConfig.sslMode()); - - // Keystore settings can be passed via system properties too so we need to read them - final String password = System.getProperty("javax.net.ssl.keyStorePassword"); - final String keyFilename = System.getProperty("javax.net.ssl.keyStore"); - KeyManager[] keyManagers = null; - if (keyFilename != null) { - final char[] passwordArray = (password == null) ? null : password.toCharArray(); - try { - KeyStore ks = KeyStore.getInstance("JKS"); - ks.load(new FileInputStream(keyFilename), passwordArray); - - KeyManagerFactory kmf = KeyManagerFactory.getInstance("NewSunX509"); - kmf.init(ks, passwordArray); - - keyManagers = kmf.getKeyManagers(); - } catch (KeyStoreException - | IOException - | CertificateException - | NoSuchAlgorithmException - | UnrecoverableKeyException e) { - throw new DebeziumException("Could not load keystore", e); - } - } - - // DBZ-1208 Resembles the logic from the upstream BinaryLogClient, only that - // the accepted TLS version is passed to the constructed factory - if (sslMode == SSLMode.PREFERRED || sslMode == SSLMode.REQUIRED) { - final KeyManager[] finalKMS = keyManagers; - return new DefaultSSLSocketFactory(acceptedTlsVersion) { - - @Override - protected void initSSLContext(SSLContext sc) throws GeneralSecurityException { - sc.init( - finalKMS, - new TrustManager[] { - new X509TrustManager() { - - @Override - public void checkClientTrusted( - X509Certificate[] x509Certificates, String s) - throws CertificateException {} - - @Override - public void checkServerTrusted( - X509Certificate[] x509Certificates, String s) - throws CertificateException {} - - @Override - public X509Certificate[] getAcceptedIssuers() { - return new X509Certificate[0]; - } - } - }, - null); - } - }; - } else { - return new DefaultSSLSocketFactory(acceptedTlsVersion); - } - } - - return null; - } - - private void logStreamingSourceState() { - logStreamingSourceState(Level.ERROR); - } - - protected void logEvent(MySqlOffsetContext offsetContext, Event event) { - LOGGER.trace("Received event: {}", event); - } - - private void logStreamingSourceState(Level severity) { - final Object position = - client == null - ? "N/A" - : client.getBinlogFilename() + "/" + client.getBinlogPosition(); - final String message = - "Error during binlog processing. Last offset stored = {}, binlog reader near position = {}"; - switch (severity) { - case WARN: - LOGGER.warn(message, lastOffset, position); - break; - case DEBUG: - LOGGER.debug(message, lastOffset, position); - break; - default: - LOGGER.error(message, lastOffset, position); - } - } - - /** - * Apply the include/exclude GTID source filters to the current {@link #source() GTID set} and - * merge them onto the currently available GTID set from a MySQL server. - * - *

The merging behavior of this method might seem a bit strange at first. It's required in - * order for Debezium to consume a MySQL binlog that has multi-source replication enabled, if a - * failover has to occur. In such a case, the server that Debezium is failed over to might have - * a different set of sources, but still include the sources required for Debezium to continue - * to function. MySQL does not allow downstream replicas to connect if the GTID set does not - * contain GTIDs for all channels that the server is replicating from, even if the server does - * have the data needed by the client. To get around this, we can have Debezium merge its GTID - * set with whatever is on the server, so that MySQL will allow it to connect. See DBZ-143 for details. - * - *

This method does not mutate any state in the context. - * - * @param availableServerGtidSet the GTID set currently available in the MySQL server - * @param purgedServerGtid the GTID set already purged by the MySQL server - * @return A GTID set meant for consuming from a MySQL binlog; may return null if the SourceInfo - * has no GTIDs and therefore none were filtered - */ - public GtidSet filterGtidSet( - MySqlOffsetContext offsetContext, - GtidSet availableServerGtidSet, - GtidSet purgedServerGtid) { - String gtidStr = offsetContext.gtidSet(); - if (gtidStr == null) { - return null; - } - LOGGER.info("Attempting to generate a filtered GTID set"); - LOGGER.info("GTID set from previous recorded offset: {}", gtidStr); - GtidSet filteredGtidSet = new GtidSet(gtidStr); - Predicate gtidSourceFilter = connectorConfig.gtidSourceFilter(); - if (gtidSourceFilter != null) { - filteredGtidSet = filteredGtidSet.retainAll(gtidSourceFilter); - LOGGER.info( - "GTID set after applying GTID source includes/excludes to previous recorded offset: {}", - filteredGtidSet); - } - LOGGER.info("GTID set available on server: {}", availableServerGtidSet); - - GtidSet mergedGtidSet; - - if (connectorConfig.gtidNewChannelPosition() == GtidNewChannelPosition.EARLIEST) { - final GtidSet knownGtidSet = filteredGtidSet; - LOGGER.info("Using first available positions for new GTID channels"); - final GtidSet relevantAvailableServerGtidSet = - (gtidSourceFilter != null) - ? availableServerGtidSet.retainAll(gtidSourceFilter) - : availableServerGtidSet; - LOGGER.info( - "Relevant GTID set available on server: {}", relevantAvailableServerGtidSet); - - mergedGtidSet = - relevantAvailableServerGtidSet - .retainAll(uuid -> knownGtidSet.forServerWithId(uuid) != null) - .with(purgedServerGtid) - .with(filteredGtidSet); - } else { - mergedGtidSet = availableServerGtidSet.with(filteredGtidSet); - } - - LOGGER.info("Final merged GTID set to use when connecting to MySQL: {}", mergedGtidSet); - return mergedGtidSet; - } - - MySqlStreamingChangeEventSourceMetrics getMetrics() { - return metrics; - } - - void rewindBinaryLogClient(ChangeEventSourceContext context, BinlogPosition position) { - try { - if (context.isRunning()) { - LOGGER.debug("Rewinding binlog to position {}", position); - client.disconnect(); - client.setBinlogFilename(position.getFilename()); - client.setBinlogPosition(position.getPosition()); - client.connect(); - } - } catch (IOException e) { - LOGGER.error("Unexpected error when re-connecting to the MySQL binary log reader", e); - } - } - - BinlogPosition getCurrentBinlogPosition() { - return new BinlogPosition(client.getBinlogFilename(), client.getBinlogPosition()); - } - - /** - * Wraps the specified exception in a {@link DebeziumException}, ensuring that all useful state - * is captured inside the new exception's message. - * - * @param error the exception; may not be null - * @return the wrapped Kafka Connect exception - */ - protected DebeziumException wrap(Throwable error) { - assert error != null; - String msg = error.getMessage(); - if (error instanceof ServerException) { - ServerException e = (ServerException) error; - msg = msg + " Error code: " + e.getErrorCode() + "; SQLSTATE: " + e.getSqlState() + "."; - } else if (error instanceof SQLException) { - SQLException e = (SQLException) error; - msg = - e.getMessage() - + " Error code: " - + e.getErrorCode() - + "; SQLSTATE: " - + e.getSQLState() - + "."; - } - return new DebeziumException(msg, error); - } - - /** LifecycleListener for Reader Thread. */ - protected final class ReaderThreadLifecycleListener implements LifecycleListener { - private final MySqlOffsetContext offsetContext; - - ReaderThreadLifecycleListener(MySqlOffsetContext offsetContext) { - this.offsetContext = offsetContext; - } - - @Override - public void onDisconnect(BinaryLogClient client) { - if (LOGGER.isInfoEnabled()) { - taskContext.temporaryLoggingContext( - connectorConfig, - "binlog", - () -> { - Map offset = lastOffset; - if (offset != null) { - LOGGER.info( - "Stopped reading binlog after {} events, last recorded offset: {}", - totalRecordCounter, - offset); - } else { - LOGGER.info( - "Stopped reading binlog after {} events, no new offset was recorded", - totalRecordCounter); - } - }); - } - } - - @Override - public void onConnect(BinaryLogClient client) { - // Set up the MDC logging context for this thread ... - taskContext.configureLoggingContext("binlog"); - - // The event row number will be used when processing the first event ... - LOGGER.info( - "Connected to MySQL binlog at {}:{}, starting at {}", - connectorConfig.hostname(), - connectorConfig.port(), - offsetContext); - } - - @Override - public void onCommunicationFailure(BinaryLogClient client, Exception ex) { - LOGGER.debug("A communication failure event arrived", ex); - logStreamingSourceState(); - try { - // Stop BinaryLogClient background threads - client.disconnect(); - } catch (final Exception e) { - LOGGER.debug("Exception while closing client", e); - } - errorHandler.setProducerThrowable(wrap(ex)); - } - - @Override - public void onEventDeserializationFailure(BinaryLogClient client, Exception ex) { - if (eventDeserializationFailureHandlingMode - == EventProcessingFailureHandlingMode.FAIL) { - LOGGER.debug("A deserialization failure event arrived", ex); - logStreamingSourceState(); - errorHandler.setProducerThrowable(wrap(ex)); - } else if (eventDeserializationFailureHandlingMode - == EventProcessingFailureHandlingMode.WARN) { - LOGGER.warn("A deserialization failure event arrived", ex); - logStreamingSourceState(Level.WARN); - } else { - LOGGER.debug("A deserialization failure event arrived", ex); - logStreamingSourceState(Level.DEBUG); - } - } - } - - @FunctionalInterface - private interface TableIdProvider { - TableId getTableId(E data); - } - - @FunctionalInterface - private interface RowsProvider { - List getRows(E data); - } -} diff --git a/flink-connector-mysql-cdc/src/main/java/io/debezium/connector/mysql/antlr/listener/DefaultValueParserListener.java b/flink-connector-mysql-cdc/src/main/java/io/debezium/connector/mysql/antlr/listener/DefaultValueParserListener.java index 5aa9390854d..d96a7a5ebd4 100644 --- a/flink-connector-mysql-cdc/src/main/java/io/debezium/connector/mysql/antlr/listener/DefaultValueParserListener.java +++ b/flink-connector-mysql-cdc/src/main/java/io/debezium/connector/mysql/antlr/listener/DefaultValueParserListener.java @@ -6,20 +6,17 @@ package io.debezium.connector.mysql.antlr.listener; -import io.debezium.connector.mysql.MySqlDefaultValueConverter; -import io.debezium.connector.mysql.MySqlValueConverters; -import io.debezium.ddl.parser.mysql.generated.MySqlParser.CurrentTimestampContext; -import io.debezium.ddl.parser.mysql.generated.MySqlParser.DefaultValueContext; +import io.debezium.ddl.parser.mysql.generated.MySqlParser; import io.debezium.ddl.parser.mysql.generated.MySqlParserBaseListener; import io.debezium.relational.ColumnEditor; import java.util.concurrent.atomic.AtomicReference; /** - * Copied from Debezium project(v1.6.4.Final) to fix + * Copied from Debezium project(v1.9.7.Final) to fix * https://github.com/ververica/flink-cdc-connectors/issues/1506. * - *

Line 66~77: use the actual default string value when the sql contains COLLATE. We should + *

Line 48~59: use the actual default string value when the sql contains COLLATE. We should * remove this class after we bumped a higher debezium version where the * https://issues.redhat.com/browse/DBZ-5587 has been fixed. */ @@ -28,32 +25,17 @@ public class DefaultValueParserListener extends MySqlParserBaseListener { private final ColumnEditor columnEditor; private final AtomicReference optionalColumn; - private final MySqlDefaultValueConverter defaultValueConverter; - - /** - * Whether to convert the column's default value into the corresponding schema type or not. This - * is done for column definitions of ALTER TABLE statements but not for CREATE TABLE. In case of - * the latter, the default value conversion is handled by the CREATE TABLE statement listener - * itself, as a default character set given at the table level might have to be applied. - */ - private final boolean convertDefault; - private boolean converted; public DefaultValueParserListener( - ColumnEditor columnEditor, - MySqlValueConverters converters, - AtomicReference optionalColumn, - boolean convertDefault) { + ColumnEditor columnEditor, AtomicReference optionalColumn) { this.columnEditor = columnEditor; - this.defaultValueConverter = new MySqlDefaultValueConverter(converters); this.optionalColumn = optionalColumn; - this.convertDefault = convertDefault; this.converted = false; } @Override - public void enterDefaultValue(DefaultValueContext ctx) { + public void enterDefaultValue(MySqlParser.DefaultValueContext ctx) { String sign = ""; if (ctx.NULL_LITERAL() != null) { return; @@ -64,10 +46,10 @@ public void enterDefaultValue(DefaultValueContext ctx) { if (ctx.constant() != null) { if (ctx.constant().stringLiteral() != null) { if (ctx.constant().stringLiteral().COLLATE() == null) { - columnEditor.defaultValue( + columnEditor.defaultValueExpression( sign + unquote(ctx.constant().stringLiteral().getText())); } else { - columnEditor.defaultValue( + columnEditor.defaultValueExpression( sign + unquote( ctx.constant() @@ -76,47 +58,42 @@ public void enterDefaultValue(DefaultValueContext ctx) { .getText())); } } else if (ctx.constant().decimalLiteral() != null) { - columnEditor.defaultValue(sign + ctx.constant().decimalLiteral().getText()); + columnEditor.defaultValueExpression( + sign + ctx.constant().decimalLiteral().getText()); } else if (ctx.constant().BIT_STRING() != null) { - columnEditor.defaultValue(unquoteBinary(ctx.constant().BIT_STRING().getText())); + columnEditor.defaultValueExpression( + unquoteBinary(ctx.constant().BIT_STRING().getText())); } else if (ctx.constant().booleanLiteral() != null) { - columnEditor.defaultValue(ctx.constant().booleanLiteral().getText()); + columnEditor.defaultValueExpression(ctx.constant().booleanLiteral().getText()); } else if (ctx.constant().REAL_LITERAL() != null) { - columnEditor.defaultValue(ctx.constant().REAL_LITERAL().getText()); + columnEditor.defaultValueExpression(ctx.constant().REAL_LITERAL().getText()); } } else if (ctx.currentTimestamp() != null && !ctx.currentTimestamp().isEmpty()) { if (ctx.currentTimestamp().size() > 1 || (ctx.ON() == null && ctx.UPDATE() == null)) { - final CurrentTimestampContext currentTimestamp = ctx.currentTimestamp(0); + final MySqlParser.CurrentTimestampContext currentTimestamp = + ctx.currentTimestamp(0); if (currentTimestamp.CURRENT_TIMESTAMP() != null || currentTimestamp.NOW() != null) { - columnEditor.defaultValue("1970-01-01 00:00:00"); + columnEditor.defaultValueExpression("1970-01-01 00:00:00"); } else { - columnEditor.defaultValue(currentTimestamp.getText()); + columnEditor.defaultValueExpression(currentTimestamp.getText()); } } } - convertDefaultValue(true); + exitDefaultValue(true); super.enterDefaultValue(ctx); } - public void convertDefaultValue(boolean skipIfUnknownOptional) { - // For CREATE TABLE are all column default values converted only after charset is known. - if (convertDefault) { - if (!converted && (optionalColumn.get() != null || !skipIfUnknownOptional)) { - convertDefaultValueToSchemaType(columnEditor); - converted = true; + public void exitDefaultValue(boolean skipIfUnknownOptional) { + boolean isOptionalColumn = optionalColumn.get() != null; + if (!converted && (isOptionalColumn || !skipIfUnknownOptional)) { + if (isOptionalColumn) { + columnEditor.optional(optionalColumn.get().booleanValue()); } + converted = true; } } - private void convertDefaultValueToSchemaType(ColumnEditor columnEditor) { - if (optionalColumn.get() != null) { - columnEditor.optional(optionalColumn.get().booleanValue()); - } - - defaultValueConverter.setColumnDefaultValue(columnEditor); - } - private String unquote(String stringLiteral) { return stringLiteral.substring(1, stringLiteral.length() - 1); } diff --git a/flink-connector-mysql-cdc/src/test/java/com/ververica/cdc/connectors/mysql/LegacyMySqlSourceTest.java b/flink-connector-mysql-cdc/src/test/java/com/ververica/cdc/connectors/mysql/LegacyMySqlSourceTest.java index e9c4135e9f5..8d55b2d3048 100644 --- a/flink-connector-mysql-cdc/src/test/java/com/ververica/cdc/connectors/mysql/LegacyMySqlSourceTest.java +++ b/flink-connector-mysql-cdc/src/test/java/com/ververica/cdc/connectors/mysql/LegacyMySqlSourceTest.java @@ -805,6 +805,7 @@ public void go() throws Exception { if (useLegacyImplementation) { // should fail because user specifies to use the legacy implementation try { + source.close(); runThread.sync(); fail("Should fail."); } catch (Exception e) { @@ -906,6 +907,9 @@ public void go() throws Exception { "Retrieve schema history failed, the schema records for engine %s has been removed," + " this might because the debezium engine has been shutdown due to other errors.", engineInstanceName))); + } finally { + source.close(); + runThread.sync(); } } } diff --git a/flink-connector-mysql-cdc/src/test/java/com/ververica/cdc/connectors/mysql/debezium/reader/BinlogSplitReaderTest.java b/flink-connector-mysql-cdc/src/test/java/com/ververica/cdc/connectors/mysql/debezium/reader/BinlogSplitReaderTest.java index da15a4c34a0..3c307efc905 100644 --- a/flink-connector-mysql-cdc/src/test/java/com/ververica/cdc/connectors/mysql/debezium/reader/BinlogSplitReaderTest.java +++ b/flink-connector-mysql-cdc/src/test/java/com/ververica/cdc/connectors/mysql/debezium/reader/BinlogSplitReaderTest.java @@ -365,6 +365,9 @@ public void testReadBinlogFromEarliestOffset() throws Exception { "+I[2003, user_24, Shanghai, 123567891234]" }; List actual = readBinlogSplits(dataType, reader, expected.length); + + reader.close(); + assertEqualsInOrder(Arrays.asList(expected), actual); } @@ -397,6 +400,9 @@ public void testReadBinlogFromEarliestOffsetAfterSchemaChange() throws Exception assertThrows(Throwable.class, () -> readBinlogSplits(dataType, reader, 1)); Optional schemaOutOfSyncException = ExceptionUtils.findThrowable(throwable, SchemaOutOfSyncException.class); + + reader.close(); + assertTrue(schemaOutOfSyncException.isPresent()); assertEquals( "Internal schema representation is probably out of sync with real database schema. " @@ -454,6 +460,9 @@ public void testReadBinlogFromBinlogFilePosition() throws Exception { "+I[2003, user_24, Shanghai, 123567891234]" }; List actual = readBinlogSplits(dataType, reader, expected.length); + + reader.close(); + assertEqualsInOrder(Arrays.asList(expected), actual); } @@ -507,6 +516,9 @@ public void testSkippingEvents() throws Exception { "+U[109, user_4, Pittsburgh, 123567891234]" }; List actual = readBinlogSplits(dataType, reader, expected.length); + + reader.close(); + assertEqualsInOrder(Arrays.asList(expected), actual); } @@ -558,6 +570,9 @@ public void testReadBinlogFromGtidSet() throws Exception { "+I[2003, user_24, Shanghai, 123567891234]" }; List actual = readBinlogSplits(dataType, reader, expected.length); + + reader.close(); + assertEqualsInOrder(Arrays.asList(expected), actual); } @@ -611,6 +626,9 @@ public void testReadBinlogFromTimestamp() throws Exception { "+I[2003, user_24, Shanghai, 123567891234]" }; List actual = readBinlogSplits(dataType, reader, expected.length); + + reader.close(); + assertEqualsInOrder(Arrays.asList(expected), actual); } @@ -669,6 +687,9 @@ public void testReadBinlogFromTimestampAfterSchemaChange() throws Exception { "+U[103, user_3, Shanghai, 123567891234, 15213]", }; List actual = readBinlogSplits(dataType, reader, expected.length); + + reader.close(); + assertEqualsInOrder(Arrays.asList(expected), actual); } @@ -720,6 +741,7 @@ public void testHeartbeatEvent() throws Exception { }, DEFAULT_TIMEOUT, "Timeout waiting for heartbeat event"); + binlogReader.close(); } private BinlogSplitReader createBinlogReader(MySqlSourceConfig sourceConfig) { @@ -733,10 +755,8 @@ private MySqlBinlogSplit createBinlogSplit(MySqlSourceConfig sourceConfig) throw try (MySqlConnection jdbc = DebeziumUtils.createMySqlConnection(sourceConfig)) { Map tableSchemas = TableDiscoveryUtils.discoverSchemaForCapturedTables( - new MySqlPartition.Provider(sourceConfig.getMySqlConnectorConfig()) - .getPartitions() - .iterator() - .next(), + new MySqlPartition( + sourceConfig.getMySqlConnectorConfig().getLogicalName()), sourceConfig, jdbc); return MySqlBinlogSplit.fillTableSchemas( diff --git a/flink-connector-mysql-cdc/src/test/java/com/ververica/cdc/connectors/mysql/debezium/reader/SnapshotSplitReaderTest.java b/flink-connector-mysql-cdc/src/test/java/com/ververica/cdc/connectors/mysql/debezium/reader/SnapshotSplitReaderTest.java index d7a3c26304e..c03bedabf6b 100644 --- a/flink-connector-mysql-cdc/src/test/java/com/ververica/cdc/connectors/mysql/debezium/reader/SnapshotSplitReaderTest.java +++ b/flink-connector-mysql-cdc/src/test/java/com/ververica/cdc/connectors/mysql/debezium/reader/SnapshotSplitReaderTest.java @@ -33,6 +33,7 @@ import com.ververica.cdc.connectors.mysql.testutils.RecordsFormatter; import com.ververica.cdc.connectors.mysql.testutils.UniqueDatabase; import io.debezium.connector.mysql.MySqlConnection; +import io.debezium.connector.mysql.MySqlPartition; import io.debezium.data.Envelope; import io.debezium.jdbc.JdbcConnection; import io.debezium.pipeline.EventDispatcher; @@ -468,8 +469,7 @@ public static MySqlSourceConfig getConfig(String[] captureTables, int splitSize) } private boolean executeSql(MySqlSourceConfig sourceConfig, String[] sqlStatements) { - JdbcConnection connection = DebeziumUtils.openJdbcConnection(sourceConfig); - try { + try (JdbcConnection connection = DebeziumUtils.openJdbcConnection(sourceConfig)) { connection.setAutoCommit(false); connection.execute(sqlStatements); connection.commit(); @@ -480,9 +480,9 @@ private boolean executeSql(MySqlSourceConfig sourceConfig, String[] sqlStatement return true; } - class MakeBinlogEventTaskContext extends StatefulTaskContext { + static class MakeBinlogEventTaskContext extends StatefulTaskContext { - private Supplier makeBinlogFunction; + private final Supplier makeBinlogFunction; public MakeBinlogEventTaskContext( MySqlSourceConfig sourceConfig, @@ -494,12 +494,14 @@ public MakeBinlogEventTaskContext( } @Override - public EventDispatcher.SnapshotReceiver getSnapshotReceiver() { - EventDispatcher.SnapshotReceiver snapshotReceiver = super.getSnapshotReceiver(); - return new EventDispatcher.SnapshotReceiver() { + public EventDispatcher.SnapshotReceiver getSnapshotReceiver() { + EventDispatcher.SnapshotReceiver snapshotReceiver = + super.getSnapshotReceiver(); + return new EventDispatcher.SnapshotReceiver() { @Override public void changeRecord( + MySqlPartition partition, DataCollectionSchema schema, Envelope.Operation operation, Object key, @@ -507,7 +509,8 @@ public void changeRecord( OffsetContext offset, ConnectHeaders headers) throws InterruptedException { - snapshotReceiver.changeRecord(schema, operation, key, value, offset, headers); + snapshotReceiver.changeRecord( + partition, schema, operation, key, value, offset, headers); } @Override diff --git a/flink-connector-mysql-cdc/src/test/java/com/ververica/cdc/connectors/mysql/source/assigners/state/PendingSplitsStateSerializerTest.java b/flink-connector-mysql-cdc/src/test/java/com/ververica/cdc/connectors/mysql/source/assigners/state/PendingSplitsStateSerializerTest.java index 5acc4e185cd..13b6027a48f 100644 --- a/flink-connector-mysql-cdc/src/test/java/com/ververica/cdc/connectors/mysql/source/assigners/state/PendingSplitsStateSerializerTest.java +++ b/flink-connector-mysql-cdc/src/test/java/com/ververica/cdc/connectors/mysql/source/assigners/state/PendingSplitsStateSerializerTest.java @@ -255,6 +255,11 @@ public String defaultCharsetName() { return "UTF-8"; } + @Override + public String comment() { + return ""; + } + @Override public TableEditor edit() { throw new UnsupportedOperationException("Not implemented."); diff --git a/flink-connector-mysql-cdc/src/test/java/com/ververica/cdc/connectors/mysql/source/reader/MySqlRecordEmitterTest.java b/flink-connector-mysql-cdc/src/test/java/com/ververica/cdc/connectors/mysql/source/reader/MySqlRecordEmitterTest.java index 8630d2c103b..e647a2d6bb2 100644 --- a/flink-connector-mysql-cdc/src/test/java/com/ververica/cdc/connectors/mysql/source/reader/MySqlRecordEmitterTest.java +++ b/flink-connector-mysql-cdc/src/test/java/com/ververica/cdc/connectors/mysql/source/reader/MySqlRecordEmitterTest.java @@ -27,14 +27,21 @@ import com.ververica.cdc.connectors.mysql.source.split.MySqlBinlogSplitState; import com.ververica.cdc.connectors.mysql.source.split.SourceRecords; import com.ververica.cdc.debezium.DebeziumDeserializationSchema; +import io.debezium.config.Configuration; +import io.debezium.connector.mysql.MySqlConnectorConfig; import io.debezium.heartbeat.Heartbeat; +import io.debezium.heartbeat.HeartbeatFactory; +import io.debezium.jdbc.JdbcConfiguration; +import io.debezium.relational.TableId; +import io.debezium.schema.TopicSelector; import io.debezium.util.SchemaNameAdjuster; import org.apache.kafka.connect.source.SourceRecord; import org.junit.Test; -import java.time.Duration; import java.util.Collections; +import static io.debezium.config.CommonConnectorConfig.TRANSACTION_TOPIC; +import static io.debezium.connector.mysql.MySqlConnectorConfig.SERVER_NAME; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; @@ -43,12 +50,21 @@ public class MySqlRecordEmitterTest { @Test public void testHeartbeatEventHandling() throws Exception { - Heartbeat heartbeat = - Heartbeat.create( - Duration.ofMillis(100), - "fake-topic", - "fake-key", + Configuration dezConf = + JdbcConfiguration.create() + .with(Heartbeat.HEARTBEAT_INTERVAL, 100) + .with(TRANSACTION_TOPIC, "fake-topic") + .with(SERVER_NAME, "mysql_binlog_source") + .build(); + + MySqlConnectorConfig mySqlConfig = new MySqlConnectorConfig(dezConf); + HeartbeatFactory heartbeatFactory = + new HeartbeatFactory<>( + new MySqlConnectorConfig(dezConf), + TopicSelector.defaultSelector( + mySqlConfig, (id, prefix, delimiter) -> "fake-topic"), SchemaNameAdjuster.create()); + Heartbeat heartbeat = heartbeatFactory.createHeartbeat(); BinlogOffset fakeOffset = BinlogOffset.ofBinlogFilePosition("fake-file", 15213L); MySqlRecordEmitter recordEmitter = createRecordEmitter(); MySqlBinlogSplitState splitState = createBinlogSplitState(); diff --git a/flink-connector-mysql-cdc/src/test/java/com/ververica/cdc/connectors/mysql/source/reader/MySqlSourceReaderTest.java b/flink-connector-mysql-cdc/src/test/java/com/ververica/cdc/connectors/mysql/source/reader/MySqlSourceReaderTest.java index 597543826cd..7a91b6e5431 100644 --- a/flink-connector-mysql-cdc/src/test/java/com/ververica/cdc/connectors/mysql/source/reader/MySqlSourceReaderTest.java +++ b/flink-connector-mysql-cdc/src/test/java/com/ververica/cdc/connectors/mysql/source/reader/MySqlSourceReaderTest.java @@ -53,8 +53,8 @@ import com.ververica.cdc.debezium.DebeziumDeserializationSchema; import com.ververica.cdc.debezium.history.FlinkJsonTableChangeSerializer; import io.debezium.connector.mysql.MySqlConnection; -import io.debezium.document.Array; import io.debezium.connector.mysql.MySqlPartition; +import io.debezium.document.Array; import io.debezium.jdbc.JdbcConnection; import io.debezium.relational.TableId; import io.debezium.relational.history.HistoryRecord; @@ -117,10 +117,8 @@ public void testBinlogReadFailoverCrossTransaction() throws Exception { try (MySqlConnection jdbc = DebeziumUtils.createMySqlConnection(sourceConfig)) { Map tableSchemas = TableDiscoveryUtils.discoverSchemaForCapturedTables( - new MySqlPartition.Provider(sourceConfig.getMySqlConnectorConfig()) - .getPartitions() - .iterator() - .next(), + new MySqlPartition( + sourceConfig.getMySqlConnectorConfig().getLogicalName()), sourceConfig, jdbc); binlogSplit = diff --git a/flink-connector-oceanbase-cdc/src/main/java/com/ververica/cdc/connectors/oceanbase/source/OceanBaseConnection.java b/flink-connector-oceanbase-cdc/src/main/java/com/ververica/cdc/connectors/oceanbase/source/OceanBaseConnection.java index c6f22383342..91143417b74 100644 --- a/flink-connector-oceanbase-cdc/src/main/java/com/ververica/cdc/connectors/oceanbase/source/OceanBaseConnection.java +++ b/flink-connector-oceanbase-cdc/src/main/java/com/ververica/cdc/connectors/oceanbase/source/OceanBaseConnection.java @@ -25,6 +25,8 @@ /** {@link JdbcConnection} extension to be used with OceanBase server. */ public class OceanBaseConnection extends JdbcConnection { + private static final String QUOTED_CHARACTER = "`"; + protected static final String URL_PATTERN = "jdbc:mysql://${hostname}:${port}/?useInformationSchema=true&nullCatalogMeansCurrent=false&useSSL=false&useUnicode=true&characterEncoding=UTF-8&characterSetResults=UTF-8&zeroDateTimeBehavior=convertToNull&connectTimeout=${connectTimeout}"; protected static final String DRIVER_CLASS_NAME = "com.mysql.jdbc.Driver"; @@ -39,8 +41,8 @@ public OceanBaseConnection( super( JdbcConfiguration.adapt(config(hostname, port, user, password, timeout)), factory(classLoader), - "`", - "`"); + QUOTED_CHARACTER, + QUOTED_CHARACTER); } public static Configuration config( diff --git a/flink-connector-oceanbase-cdc/src/main/java/com/ververica/cdc/connectors/oceanbase/source/OceanBaseTableSchema.java b/flink-connector-oceanbase-cdc/src/main/java/com/ververica/cdc/connectors/oceanbase/source/OceanBaseTableSchema.java deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/flink-connector-oracle-cdc/pom.xml b/flink-connector-oracle-cdc/pom.xml index 0e24731689c..47217337c31 100644 --- a/flink-connector-oracle-cdc/pom.xml +++ b/flink-connector-oracle-cdc/pom.xml @@ -154,6 +154,11 @@ under the License. test + + org.apache.commons + commons-lang3 + 3.7 + diff --git a/flink-connector-oracle-cdc/src/main/java/com/ververica/cdc/connectors/oracle/OracleSource.java b/flink-connector-oracle-cdc/src/main/java/com/ververica/cdc/connectors/oracle/OracleSource.java index 08d4aa71e46..d099246aca3 100644 --- a/flink-connector-oracle-cdc/src/main/java/com/ververica/cdc/connectors/oracle/OracleSource.java +++ b/flink-connector-oracle-cdc/src/main/java/com/ververica/cdc/connectors/oracle/OracleSource.java @@ -166,6 +166,8 @@ public DebeziumSourceFunction build() { if (tableList != null) { props.setProperty("table.include.list", String.join(",", tableList)); } + // we need this in order not to lose any transaction during snapshot to streaming switch + props.setProperty("internal.log.mining.transaction.snapshot.boundary.mode", "all"); DebeziumOffset specificOffset = null; switch (startupOptions.startupMode) { diff --git a/flink-connector-oracle-cdc/src/main/java/com/ververica/cdc/connectors/oracle/source/reader/fetch/OracleScanFetchTask.java b/flink-connector-oracle-cdc/src/main/java/com/ververica/cdc/connectors/oracle/source/reader/fetch/OracleScanFetchTask.java index edc0924d89f..12a4971f455 100644 --- a/flink-connector-oracle-cdc/src/main/java/com/ververica/cdc/connectors/oracle/source/reader/fetch/OracleScanFetchTask.java +++ b/flink-connector-oracle-cdc/src/main/java/com/ververica/cdc/connectors/oracle/source/reader/fetch/OracleScanFetchTask.java @@ -29,7 +29,7 @@ import io.debezium.connector.oracle.OracleConnectorConfig; import io.debezium.connector.oracle.OracleDatabaseSchema; import io.debezium.connector.oracle.OracleOffsetContext; -import io.debezium.connector.oracle.OracleValueConverters; +import io.debezium.connector.oracle.OraclePartition; import io.debezium.connector.oracle.logminer.LogMinerOracleOffsetContextLoader; import io.debezium.heartbeat.Heartbeat; import io.debezium.pipeline.EventDispatcher; @@ -39,19 +39,14 @@ import io.debezium.pipeline.spi.ChangeRecordEmitter; import io.debezium.pipeline.spi.OffsetContext; import io.debezium.pipeline.spi.SnapshotResult; -import io.debezium.relational.Column; import io.debezium.relational.RelationalSnapshotChangeEventSource; import io.debezium.relational.SnapshotChangeRecordEmitter; import io.debezium.relational.Table; import io.debezium.relational.TableId; -import io.debezium.relational.ValueConverter; import io.debezium.util.Clock; import io.debezium.util.ColumnUtils; import io.debezium.util.Strings; import io.debezium.util.Threads; -import org.apache.kafka.connect.data.Field; -import org.apache.kafka.connect.data.Schema; -import org.apache.kafka.connect.data.SchemaBuilder; import org.apache.kafka.connect.errors.ConnectException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -106,9 +101,11 @@ public void execute(Context context) throws Exception { split); SnapshotSplitChangeEventSourceContext changeEventSourceContext = new SnapshotSplitChangeEventSourceContext(); - SnapshotResult snapshotResult = + SnapshotResult snapshotResult = snapshotSplitReadTask.execute( - changeEventSourceContext, sourceFetchContext.getOffsetContext()); + changeEventSourceContext, + sourceFetchContext.getPartition(), + sourceFetchContext.getOffsetContext()); final StreamSplit backfillBinlogSplit = createBackfillRedoLogSplit(changeEventSourceContext); @@ -121,7 +118,7 @@ public void execute(Context context) throws Exception { if (!binlogBackfillRequired) { dispatchBinlogEndEvent( backfillBinlogSplit, - ((OracleSourceFetchTaskContext) context).getOffsetContext().getPartition(), + sourceFetchContext.getPartition().getSourcePartition(), ((OracleSourceFetchTaskContext) context).getDispatcher()); taskRunning = false; return; @@ -132,6 +129,7 @@ public void execute(Context context) throws Exception { createBackfillRedoLogReadTask(backfillBinlogSplit, sourceFetchContext); backfillBinlogReadTask.execute( new SnapshotBinlogSplitChangeEventSourceContext(), + sourceFetchContext.getPartition(), sourceFetchContext.getOffsetContext()); } else { taskRunning = false; @@ -157,8 +155,6 @@ private RedoLogSplitReadTask createBackfillRedoLogReadTask( context.getSourceConfig().getDbzConnectorConfig(); final OffsetContext.Loader loader = new LogMinerOracleOffsetContextLoader(oracleConnectorConfig); - final OracleOffsetContext oracleOffsetContext = - loader.load(backfillBinlogSplit.getStartingOffset().getOffset()); // we should only capture events for the current table, // otherwise, we may can't find corresponding schema Configuration dezConf = @@ -184,7 +180,7 @@ private RedoLogSplitReadTask createBackfillRedoLogReadTask( private void dispatchBinlogEndEvent( StreamSplit backFillBinlogSplit, Map sourcePartition, - JdbcSourceEventDispatcher eventDispatcher) + JdbcSourceEventDispatcher eventDispatcher) throws InterruptedException { eventDispatcher.dispatchWatermarkEvent( sourcePartition, @@ -194,7 +190,8 @@ private void dispatchBinlogEndEvent( } /** A wrapped task to fetch snapshot split of table. */ - public static class OracleSnapshotSplitReadTask extends AbstractSnapshotChangeEventSource { + public static class OracleSnapshotSplitReadTask + extends AbstractSnapshotChangeEventSource { private static final Logger LOG = LoggerFactory.getLogger(OracleSnapshotSplitReadTask.class); @@ -205,19 +202,19 @@ public static class OracleSnapshotSplitReadTask extends AbstractSnapshotChangeEv private final OracleConnectorConfig connectorConfig; private final OracleDatabaseSchema databaseSchema; private final OracleConnection jdbcConnection; - private final JdbcSourceEventDispatcher dispatcher; + private final JdbcSourceEventDispatcher dispatcher; private final Clock clock; private final SnapshotSplit snapshotSplit; private final OracleOffsetContext offsetContext; - private final SnapshotProgressListener snapshotProgressListener; + private final SnapshotProgressListener snapshotProgressListener; public OracleSnapshotSplitReadTask( OracleConnectorConfig connectorConfig, OracleOffsetContext previousOffset, - SnapshotProgressListener snapshotProgressListener, + SnapshotProgressListener snapshotProgressListener, OracleDatabaseSchema databaseSchema, OracleConnection jdbcConnection, - JdbcSourceEventDispatcher dispatcher, + JdbcSourceEventDispatcher dispatcher, SnapshotSplit snapshotSplit) { super(connectorConfig, snapshotProgressListener); this.offsetContext = previousOffset; @@ -231,13 +228,15 @@ public OracleSnapshotSplitReadTask( } @Override - public SnapshotResult execute( - ChangeEventSourceContext context, OffsetContext previousOffset) + public SnapshotResult execute( + ChangeEventSourceContext context, + OraclePartition partition, + OracleOffsetContext previousOffset) throws InterruptedException { - SnapshottingTask snapshottingTask = getSnapshottingTask(previousOffset); - final SnapshotContext ctx; + SnapshottingTask snapshottingTask = getSnapshottingTask(partition, previousOffset); + final SnapshotContext ctx; try { - ctx = prepare(context); + ctx = prepare(partition); } catch (Exception e) { LOG.error("Failed to initialize snapshot context.", e); throw new RuntimeException(e); @@ -253,14 +252,13 @@ public SnapshotResult execute( } @Override - protected SnapshotResult doExecute( + protected SnapshotResult doExecute( ChangeEventSourceContext context, - OffsetContext previousOffset, + OracleOffsetContext previousOffset, SnapshotContext snapshotContext, SnapshottingTask snapshottingTask) throws Exception { - final RelationalSnapshotChangeEventSource.RelationalSnapshotContext ctx = - (RelationalSnapshotChangeEventSource.RelationalSnapshotContext) snapshotContext; + final OracleSnapshotContext ctx = (OracleSnapshotContext) snapshotContext; ctx.offset = offsetContext; final RedoLogOffset lowWatermark = currentRedoLogOffset(jdbcConnection); @@ -270,7 +268,10 @@ protected SnapshotResult doExecute( snapshotSplit); ((SnapshotSplitChangeEventSourceContext) (context)).setLowWatermark(lowWatermark); dispatcher.dispatchWatermarkEvent( - offsetContext.getPartition(), snapshotSplit, lowWatermark, WatermarkKind.LOW); + snapshotContext.partition.getSourcePartition(), + snapshotSplit, + lowWatermark, + WatermarkKind.LOW); LOG.info("Snapshot step 2 - Snapshotting data"); createDataEvents(ctx, snapshotSplit.getTableId()); @@ -282,34 +283,37 @@ protected SnapshotResult doExecute( snapshotSplit); ((SnapshotSplitChangeEventSourceContext) (context)).setHighWatermark(lowWatermark); dispatcher.dispatchWatermarkEvent( - offsetContext.getPartition(), snapshotSplit, highWatermark, WatermarkKind.HIGH); + snapshotContext.partition.getSourcePartition(), + snapshotSplit, + highWatermark, + WatermarkKind.HIGH); return SnapshotResult.completed(ctx.offset); } @Override - protected SnapshottingTask getSnapshottingTask(OffsetContext previousOffset) { + protected SnapshottingTask getSnapshottingTask( + OraclePartition partition, OracleOffsetContext previousOffset) { return new SnapshottingTask(false, true); } @Override - protected SnapshotContext prepare(ChangeEventSourceContext changeEventSourceContext) - throws Exception { - return new MySqlSnapshotContext(); + protected SnapshotContext prepare( + OraclePartition partition) throws Exception { + return new OracleSnapshotContext(partition); } - private static class MySqlSnapshotContext - extends RelationalSnapshotChangeEventSource.RelationalSnapshotContext { + private static class OracleSnapshotContext + extends RelationalSnapshotChangeEventSource.RelationalSnapshotContext< + OraclePartition, OracleOffsetContext> { - public MySqlSnapshotContext() throws SQLException { - super(""); + public OracleSnapshotContext(OraclePartition partition) throws SQLException { + super(partition, ""); } } - private void createDataEvents( - RelationalSnapshotChangeEventSource.RelationalSnapshotContext snapshotContext, - TableId tableId) + private void createDataEvents(OracleSnapshotContext snapshotContext, TableId tableId) throws Exception { - EventDispatcher.SnapshotReceiver snapshotReceiver = + EventDispatcher.SnapshotReceiver snapshotReceiver = dispatcher.getSnapshotChangeEventReceiver(); LOG.debug("Snapshotting table {}", tableId); createDataEventsForTable( @@ -319,8 +323,8 @@ private void createDataEvents( /** Dispatches the data change events for the records of a single table. */ private void createDataEventsForTable( - RelationalSnapshotChangeEventSource.RelationalSnapshotContext snapshotContext, - EventDispatcher.SnapshotReceiver snapshotReceiver, + OracleSnapshotContext snapshotContext, + EventDispatcher.SnapshotReceiver snapshotReceiver, Table table) throws InterruptedException { @@ -360,12 +364,8 @@ private void createDataEventsForTable( while (rs.next()) { rows++; - final Object[] row = new Object[columnArray.getGreatestColumnPosition()]; - for (int i = 0; i < columnArray.getColumns().length; i++) { - Column actualColumn = table.columns().get(i); - row[columnArray.getColumns()[i].position() - 1] = - readField(rs, i + 1, actualColumn, table); - } + final Object[] row = + jdbcConnection.rowToArray(table, databaseSchema, rs, columnArray); if (logTimer.expired()) { long stop = clock.currentTimeInMillis(); LOG.info( @@ -373,10 +373,12 @@ private void createDataEventsForTable( rows, snapshotSplit.splitId(), Strings.duration(stop - exportStart)); - snapshotProgressListener.rowsScanned(table.id(), rows); + snapshotProgressListener.rowsScanned( + snapshotContext.partition, table.id(), rows); logTimer = getTableScanLogTimer(); } dispatcher.dispatchSnapshotEvent( + snapshotContext.partition, table.id(), getChangeRecordEmitter(snapshotContext, table.id(), row), snapshotReceiver); @@ -391,43 +393,25 @@ private void createDataEventsForTable( } } - protected ChangeRecordEmitter getChangeRecordEmitter( - SnapshotContext snapshotContext, TableId tableId, Object[] row) { + protected ChangeRecordEmitter getChangeRecordEmitter( + SnapshotContext snapshotContext, + TableId tableId, + Object[] row) { snapshotContext.offset.event(tableId, clock.currentTime()); - return new SnapshotChangeRecordEmitter(snapshotContext.offset, row, clock); + return new SnapshotChangeRecordEmitter<>( + snapshotContext.partition, snapshotContext.offset, row, clock); } private Threads.Timer getTableScanLogTimer() { return Threads.timer(clock, LOG_INTERVAL); } - - /** - * copied from - * io.debezium.connector.oracle.antlr.listener.ParserUtils#convertValueToSchemaType. - */ - private Object readField(ResultSet rs, int fieldNo, Column actualColumn, Table actualTable) - throws SQLException { - - OracleValueConverters oracleValueConverters = - new OracleValueConverters(connectorConfig, jdbcConnection); - - final SchemaBuilder schemaBuilder = oracleValueConverters.schemaBuilder(actualColumn); - if (schemaBuilder == null) { - return null; - } - Schema schema = schemaBuilder.build(); - Field field = new Field(actualColumn.name(), 1, schema); - final ValueConverter valueConverter = - oracleValueConverters.converter(actualColumn, field); - return valueConverter.convert(rs.getObject(fieldNo)); - } } /** * {@link ChangeEventSource.ChangeEventSourceContext} implementation that keeps low/high * watermark for each {@link SnapshotSplit}. */ - public class SnapshotSplitChangeEventSourceContext + public static class SnapshotSplitChangeEventSourceContext implements ChangeEventSource.ChangeEventSourceContext { private RedoLogOffset lowWatermark; diff --git a/flink-connector-oracle-cdc/src/main/java/com/ververica/cdc/connectors/oracle/source/reader/fetch/OracleSourceFetchTaskContext.java b/flink-connector-oracle-cdc/src/main/java/com/ververica/cdc/connectors/oracle/source/reader/fetch/OracleSourceFetchTaskContext.java index 0051f3c2121..b5cde0a6a3a 100644 --- a/flink-connector-oracle-cdc/src/main/java/com/ververica/cdc/connectors/oracle/source/reader/fetch/OracleSourceFetchTaskContext.java +++ b/flink-connector-oracle-cdc/src/main/java/com/ververica/cdc/connectors/oracle/source/reader/fetch/OracleSourceFetchTaskContext.java @@ -35,6 +35,7 @@ import io.debezium.connector.oracle.OracleDatabaseSchema; import io.debezium.connector.oracle.OracleErrorHandler; import io.debezium.connector.oracle.OracleOffsetContext; +import io.debezium.connector.oracle.OraclePartition; import io.debezium.connector.oracle.OracleStreamingChangeEventSourceMetrics; import io.debezium.connector.oracle.OracleTaskContext; import io.debezium.connector.oracle.OracleTopicSelector; @@ -46,6 +47,7 @@ import io.debezium.pipeline.metrics.SnapshotChangeEventSourceMetrics; import io.debezium.pipeline.source.spi.EventMetadataProvider; import io.debezium.pipeline.spi.OffsetContext; +import io.debezium.pipeline.spi.Offsets; import io.debezium.relational.Table; import io.debezium.relational.TableId; import io.debezium.relational.Tables; @@ -71,10 +73,12 @@ public class OracleSourceFetchTaskContext extends JdbcSourceFetchTaskContext { private OracleDatabaseSchema databaseSchema; private OracleTaskContext taskContext; private OracleOffsetContext offsetContext; - private SnapshotChangeEventSourceMetrics snapshotChangeEventSourceMetrics; + private OraclePartition partition; + + private SnapshotChangeEventSourceMetrics snapshotChangeEventSourceMetrics; private OracleStreamingChangeEventSourceMetrics streamingChangeEventSourceMetrics; private TopicSelector topicSelector; - private JdbcSourceEventDispatcher dispatcher; + private JdbcSourceEventDispatcher dispatcher; private ChangeEventQueue queue; private OracleErrorHandler errorHandler; @@ -102,12 +106,13 @@ public void configure(SourceSplitBase sourceSplitBase) { this.offsetContext = loadStartingOffsetState( new LogMinerOracleOffsetContextLoader(connectorConfig), sourceSplitBase); + this.partition = new OraclePartition(connectorConfig.getLogicalName()); validateAndLoadDatabaseHistory(offsetContext, databaseSchema); this.taskContext = new OracleTaskContext(connectorConfig, databaseSchema); final int queueSize = sourceSplitBase.isSnapshotSplit() - ? Integer.MAX_VALUE + ? getSourceConfig().getSplitSize() : getSourceConfig().getDbzConnectorConfig().getMaxQueueSize(); this.queue = new ChangeEventQueue.Builder() @@ -123,7 +128,7 @@ public void configure(SourceSplitBase sourceSplitBase) { // .buffering() .build(); this.dispatcher = - new JdbcSourceEventDispatcher( + new JdbcSourceEventDispatcher<>( connectorConfig, topicSelector, databaseSchema, @@ -144,7 +149,7 @@ public void configure(SourceSplitBase sourceSplitBase) { (OracleStreamingChangeEventSourceMetrics) changeEventSourceMetricsFactory.getStreamingMetrics( taskContext, queue, metadataProvider); - this.errorHandler = new OracleErrorHandler(connectorConfig.getLogicalName(), queue); + this.errorHandler = new OracleErrorHandler(connectorConfig, queue); } @Override @@ -166,7 +171,7 @@ public OracleOffsetContext getOffsetContext() { return offsetContext; } - public SnapshotChangeEventSourceMetrics getSnapshotChangeEventSourceMetrics() { + public SnapshotChangeEventSourceMetrics getSnapshotChangeEventSourceMetrics() { return snapshotChangeEventSourceMetrics; } @@ -190,7 +195,7 @@ public RowType getSplitType(Table table) { } @Override - public JdbcSourceEventDispatcher getDispatcher() { + public JdbcSourceEventDispatcher getDispatcher() { return dispatcher; } @@ -199,6 +204,10 @@ public ChangeEventQueue getQueue() { return queue; } + public OraclePartition getPartition() { + return partition; + } + @Override public Tables.TableFilter getTableFilter() { return getDbzConnectorConfig().getTableFilters().dataCollectionFilter(); @@ -211,22 +220,19 @@ public Offset getStreamOffset(SourceRecord sourceRecord) { /** Loads the connector's persistent offset (if present) via the given loader. */ private OracleOffsetContext loadStartingOffsetState( - OffsetContext.Loader loader, SourceSplitBase oracleSplit) { + OffsetContext.Loader loader, SourceSplitBase oracleSplit) { Offset offset = oracleSplit.isSnapshotSplit() ? RedoLogOffset.INITIAL_OFFSET : oracleSplit.asStreamSplit().getStartingOffset(); - OracleOffsetContext oracleOffsetContext = - (OracleOffsetContext) loader.load(offset.getOffset()); - - return oracleOffsetContext; + return loader.load(offset.getOffset()); } private void validateAndLoadDatabaseHistory( OracleOffsetContext offset, OracleDatabaseSchema schema) { schema.initializeStorage(); - schema.recover(offset); + schema.recover(Offsets.of(partition, offset)); } /** Copied from debezium for accessing here. */ diff --git a/flink-connector-oracle-cdc/src/main/java/com/ververica/cdc/connectors/oracle/source/reader/fetch/OracleStreamFetchTask.java b/flink-connector-oracle-cdc/src/main/java/com/ververica/cdc/connectors/oracle/source/reader/fetch/OracleStreamFetchTask.java index 973690888a4..15b96bcab52 100644 --- a/flink-connector-oracle-cdc/src/main/java/com/ververica/cdc/connectors/oracle/source/reader/fetch/OracleStreamFetchTask.java +++ b/flink-connector-oracle-cdc/src/main/java/com/ververica/cdc/connectors/oracle/source/reader/fetch/OracleStreamFetchTask.java @@ -28,6 +28,7 @@ import io.debezium.connector.oracle.OracleConnectorConfig; import io.debezium.connector.oracle.OracleDatabaseSchema; import io.debezium.connector.oracle.OracleOffsetContext; +import io.debezium.connector.oracle.OraclePartition; import io.debezium.connector.oracle.OracleStreamingChangeEventSourceMetrics; import io.debezium.connector.oracle.logminer.LogMinerStreamingChangeEventSource; import io.debezium.pipeline.ErrorHandler; @@ -69,7 +70,9 @@ public void execute(Context context) throws Exception { RedoLogSplitChangeEventSourceContext changeEventSourceContext = new RedoLogSplitChangeEventSourceContext(); redoLogSplitReadTask.execute( - changeEventSourceContext, sourceFetchContext.getOffsetContext()); + changeEventSourceContext, + sourceFetchContext.getPartition(), + sourceFetchContext.getOffsetContext()); } @Override @@ -90,14 +93,14 @@ public static class RedoLogSplitReadTask extends LogMinerStreamingChangeEventSou private static final Logger LOG = LoggerFactory.getLogger(RedoLogSplitReadTask.class); private final StreamSplit redoLogSplit; - private final JdbcSourceEventDispatcher dispatcher; + private final JdbcSourceEventDispatcher dispatcher; private final ErrorHandler errorHandler; private ChangeEventSourceContext context; public RedoLogSplitReadTask( OracleConnectorConfig connectorConfig, OracleConnection connection, - JdbcSourceEventDispatcher dispatcher, + JdbcSourceEventDispatcher dispatcher, ErrorHandler errorHandler, OracleDatabaseSchema schema, Configuration jdbcConfig, @@ -118,14 +121,18 @@ public RedoLogSplitReadTask( } @Override - public void execute(ChangeEventSourceContext context, OracleOffsetContext offsetContext) { + public void execute( + ChangeEventSourceContext context, + OraclePartition partition, + OracleOffsetContext offsetContext) { this.context = context; - super.execute(context, offsetContext); + super.execute(context, partition, offsetContext); } @Override - public void afterHandleScn(OracleOffsetContext offsetContext) { - super.afterHandleScn(offsetContext); + protected void afterHandleScn( + OraclePartition partition, OracleOffsetContext offsetContext) { + super.afterHandleScn(partition, offsetContext); // check do we need to stop for fetch binlog for snapshot split. if (isBoundedRead()) { final RedoLogOffset currentRedoLogOffset = @@ -135,7 +142,7 @@ public void afterHandleScn(OracleOffsetContext offsetContext) { // send binlog end event try { dispatcher.dispatchWatermarkEvent( - offsetContext.getPartition(), + partition.getSourcePartition(), redoLogSplit, currentRedoLogOffset, WatermarkKind.END); diff --git a/flink-connector-oracle-cdc/src/main/java/com/ververica/cdc/connectors/oracle/source/utils/OracleConnectionUtils.java b/flink-connector-oracle-cdc/src/main/java/com/ververica/cdc/connectors/oracle/source/utils/OracleConnectionUtils.java index 7e926e9922c..3f08d44816e 100644 --- a/flink-connector-oracle-cdc/src/main/java/com/ververica/cdc/connectors/oracle/source/utils/OracleConnectionUtils.java +++ b/flink-connector-oracle-cdc/src/main/java/com/ververica/cdc/connectors/oracle/source/utils/OracleConnectionUtils.java @@ -22,6 +22,7 @@ import io.debezium.config.Configuration; import io.debezium.connector.oracle.OracleConnection; import io.debezium.connector.oracle.Scn; +import io.debezium.jdbc.JdbcConfiguration; import io.debezium.jdbc.JdbcConnection; import io.debezium.relational.RelationalTableFilters; import io.debezium.relational.TableId; @@ -48,10 +49,15 @@ public class OracleConnectionUtils { private static final String SHOW_CURRENT_SCN = "SELECT CURRENT_SCN FROM V$DATABASE"; /** Creates a new {@link OracleConnection}, but not open the connection. */ - public static OracleConnection createOracleConnection(Configuration dbzConfiguration) { + public static OracleConnection createOracleConnection(Configuration configuration) { + return createOracleConnection(JdbcConfiguration.adapt(configuration)); + } + + /** Creates a new {@link OracleConnection}, but not open the connection. */ + public static OracleConnection createOracleConnection(JdbcConfiguration dbzConfiguration) { Configuration configuration = dbzConfiguration.subset(DATABASE_CONFIG_PREFIX, true); return new OracleConnection( - configuration.isEmpty() ? dbzConfiguration : configuration, + configuration.isEmpty() ? dbzConfiguration : JdbcConfiguration.adapt(configuration), OracleConnectionUtils.class::getClassLoader); } diff --git a/flink-connector-oracle-cdc/src/main/java/com/ververica/cdc/connectors/oracle/source/utils/OracleUtils.java b/flink-connector-oracle-cdc/src/main/java/com/ververica/cdc/connectors/oracle/source/utils/OracleUtils.java index 58fee6746cc..df51b6b323f 100644 --- a/flink-connector-oracle-cdc/src/main/java/com/ververica/cdc/connectors/oracle/source/utils/OracleUtils.java +++ b/flink-connector-oracle-cdc/src/main/java/com/ververica/cdc/connectors/oracle/source/utils/OracleUtils.java @@ -24,9 +24,11 @@ import io.debezium.connector.oracle.OracleConnection; import io.debezium.connector.oracle.OracleConnectorConfig; import io.debezium.connector.oracle.OracleDatabaseSchema; +import io.debezium.connector.oracle.OracleDefaultValueConverter; import io.debezium.connector.oracle.OracleTopicSelector; import io.debezium.connector.oracle.OracleValueConverters; import io.debezium.connector.oracle.StreamingAdapter; +import io.debezium.jdbc.JdbcConfiguration; import io.debezium.jdbc.JdbcConnection; import io.debezium.relational.Column; import io.debezium.relational.Table; @@ -269,11 +271,14 @@ public static OracleDatabaseSchema createOracleDatabaseSchema( // OracleConnectionUtils.createOracleConnection((Configuration) dbzOracleConfig); OracleValueConverters oracleValueConverters = new OracleValueConverters(dbzOracleConfig, oracleConnection); + OracleDefaultValueConverter defaultValueConverter = + new OracleDefaultValueConverter(oracleValueConverters, oracleConnection); StreamingAdapter.TableNameCaseSensitivity tableNameCaseSensitivity = dbzOracleConfig.getAdapter().getTableNameCaseSensitivity(oracleConnection); return new OracleDatabaseSchema( dbzOracleConfig, oracleValueConverters, + defaultValueConverter, schemaNameAdjuster, topicSelector, tableNameCaseSensitivity); @@ -285,9 +290,12 @@ public static OracleDatabaseSchema createOracleDatabaseSchema( TopicSelector topicSelector = OracleTopicSelector.defaultSelector(dbzOracleConfig); SchemaNameAdjuster schemaNameAdjuster = SchemaNameAdjuster.create(); OracleConnection oracleConnection = - OracleConnectionUtils.createOracleConnection((Configuration) dbzOracleConfig); + OracleConnectionUtils.createOracleConnection( + JdbcConfiguration.adapt((Configuration) dbzOracleConfig)); OracleValueConverters oracleValueConverters = new OracleValueConverters(dbzOracleConfig, oracleConnection); + OracleDefaultValueConverter defaultValueConverter = + new OracleDefaultValueConverter(oracleValueConverters, oracleConnection); StreamingAdapter.TableNameCaseSensitivity tableNameCaseSensitivity = tableIdCaseInsensitive ? StreamingAdapter.TableNameCaseSensitivity.SENSITIVE @@ -295,6 +303,7 @@ public static OracleDatabaseSchema createOracleDatabaseSchema( return new OracleDatabaseSchema( dbzOracleConfig, oracleValueConverters, + defaultValueConverter, schemaNameAdjuster, topicSelector, tableNameCaseSensitivity); diff --git a/flink-connector-oracle-cdc/src/main/java/io/debezium/connector/oracle/OracleErrorHandler.java b/flink-connector-oracle-cdc/src/main/java/io/debezium/connector/oracle/OracleErrorHandler.java deleted file mode 100644 index 01a696579f0..00000000000 --- a/flink-connector-oracle-cdc/src/main/java/io/debezium/connector/oracle/OracleErrorHandler.java +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright 2022 Ververica Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.debezium.connector.oracle; - -import io.debezium.connector.base.ChangeEventQueue; -import io.debezium.pipeline.ErrorHandler; - -import java.io.IOException; -import java.sql.SQLRecoverableException; -import java.util.ArrayList; -import java.util.List; - -/** - * Copied from https://github.com/debezium/debezium project to fix - * https://issues.redhat.com/browse/DBZ-4536 for 1.6.4.Final version. - * - *

This file is override to fix logger mining session stopped due to 'No more data to read from - * socket' exception. please see more discussion under - * https://github.com/debezium/debezium/pull/3118, We should remove this class since we bumped - * higher debezium version after 1.8.1.Final where the issue has been fixed. - */ -public class OracleErrorHandler extends ErrorHandler { - - private static final List retryOracleErrors = new ArrayList<>(); - private static final List retryOracleMessageContainsTexts = new ArrayList<>(); - - static { - // Contents of this list should only be ORA-xxxxx errors - // The error check uses starts-with semantics - retryOracleErrors.add("ORA-03135"); // connection lost - retryOracleErrors.add("ORA-12543"); // TNS:destination host unreachable - retryOracleErrors.add("ORA-00604"); // error occurred at recursive SQL level 1 - retryOracleErrors.add("ORA-01089"); // Oracle immediate shutdown in progress - retryOracleErrors.add("ORA-01333"); // Failed to establish LogMiner dictionary - retryOracleErrors.add("ORA-01284"); // Redo/Archive log cannot be opened, likely locked - retryOracleErrors.add( - "ORA-26653"); // Apply DBZXOUT did not start properly and is currently in state - // INITIAL - retryOracleErrors.add("ORA-01291"); // missing logfile - retryOracleErrors.add( - "ORA-01327"); // failed to exclusively lock system dictionary as required BUILD - retryOracleErrors.add("ORA-04030"); // out of process memory - - // Contents of this list should be any type of error message text - // The error check uses case-insensitive contains semantics - retryOracleMessageContainsTexts.add("No more data to read from socket"); - } - - public OracleErrorHandler(String logicalName, ChangeEventQueue queue) { - super(OracleConnector.class, logicalName, queue); - } - - @Override - protected boolean isRetriable(Throwable throwable) { - while (throwable != null) { - // Always retry any recoverable error - if (throwable instanceof SQLRecoverableException) { - return true; - } - - // If message is provided, run checks against it - final String message = throwable.getMessage(); - if (message != null && message.length() > 0) { - // Check Oracle error codes - for (String errorCode : retryOracleErrors) { - if (message.startsWith(errorCode)) { - return true; - } - } - // Check Oracle error message texts - for (String messageText : retryOracleMessageContainsTexts) { - if (message.toUpperCase().contains(messageText.toUpperCase())) { - return true; - } - } - } - - if (throwable.getCause() != null) { - // We explicitly check this below the top-level error as we only want - // certain nested exceptions to be retried, not if they're at the top - final Throwable cause = throwable.getCause(); - if (cause instanceof IOException) { - return true; - } - } - - throwable = throwable.getCause(); - } - return false; - } -} diff --git a/flink-connector-oracle-cdc/src/main/java/io/debezium/connector/oracle/logminer/LogMinerAdapter.java b/flink-connector-oracle-cdc/src/main/java/io/debezium/connector/oracle/logminer/LogMinerAdapter.java new file mode 100644 index 00000000000..53387e3cdbd --- /dev/null +++ b/flink-connector-oracle-cdc/src/main/java/io/debezium/connector/oracle/logminer/LogMinerAdapter.java @@ -0,0 +1,448 @@ +/* + * Copyright 2023 Ververica Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.debezium.connector.oracle.logminer; + +import io.debezium.DebeziumException; +import io.debezium.config.Configuration; +import io.debezium.connector.oracle.AbstractStreamingAdapter; +import io.debezium.connector.oracle.OracleConnection; +import io.debezium.connector.oracle.OracleConnectorConfig; +import io.debezium.connector.oracle.OracleConnectorConfig.TransactionSnapshotBoundaryMode; +import io.debezium.connector.oracle.OracleDatabaseSchema; +import io.debezium.connector.oracle.OracleOffsetContext; +import io.debezium.connector.oracle.OraclePartition; +import io.debezium.connector.oracle.OracleStreamingChangeEventSourceMetrics; +import io.debezium.connector.oracle.OracleTaskContext; +import io.debezium.connector.oracle.Scn; +import io.debezium.document.Document; +import io.debezium.pipeline.ErrorHandler; +import io.debezium.pipeline.EventDispatcher; +import io.debezium.pipeline.source.snapshot.incremental.SignalBasedIncrementalSnapshotContext; +import io.debezium.pipeline.source.spi.StreamingChangeEventSource; +import io.debezium.pipeline.spi.OffsetContext; +import io.debezium.pipeline.txmetadata.TransactionContext; +import io.debezium.relational.RelationalSnapshotChangeEventSource.RelationalSnapshotContext; +import io.debezium.relational.TableId; +import io.debezium.relational.history.HistoryRecordComparator; +import io.debezium.util.Clock; +import io.debezium.util.HexConverter; +import io.debezium.util.Strings; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; + +/** + * Copied from Debezium 1.9.7. + * + *

Line 356: Replace < condition with <= to be able to catch ongoing transactions during snapshot + * if current SCN points to START/INSERT/DELETE/UPDATE event. + */ +public class LogMinerAdapter extends AbstractStreamingAdapter { + + private static final Duration GET_TRANSACTION_SCN_PAUSE = Duration.ofSeconds(1); + + private static final int GET_TRANSACTION_SCN_ATTEMPTS = 5; + + private static final Logger LOGGER = LoggerFactory.getLogger(LogMinerAdapter.class); + + public static final String TYPE = "logminer"; + + public LogMinerAdapter(OracleConnectorConfig connectorConfig) { + super(connectorConfig); + } + + @Override + public String getType() { + return TYPE; + } + + @Override + public HistoryRecordComparator getHistoryRecordComparator() { + return new HistoryRecordComparator() { + @Override + protected boolean isPositionAtOrBefore(Document recorded, Document desired) { + return resolveScn(recorded).compareTo(resolveScn(desired)) < 1; + } + }; + } + + @Override + public OffsetContext.Loader getOffsetContextLoader() { + return new LogMinerOracleOffsetContextLoader(connectorConfig); + } + + @Override + public StreamingChangeEventSource getSource( + OracleConnection connection, + EventDispatcher dispatcher, + ErrorHandler errorHandler, + Clock clock, + OracleDatabaseSchema schema, + OracleTaskContext taskContext, + Configuration jdbcConfig, + OracleStreamingChangeEventSourceMetrics streamingMetrics) { + return new LogMinerStreamingChangeEventSource( + connectorConfig, + connection, + dispatcher, + errorHandler, + clock, + schema, + jdbcConfig, + streamingMetrics); + } + + @Override + public OracleOffsetContext determineSnapshotOffset( + RelationalSnapshotContext ctx, + OracleConnectorConfig connectorConfig, + OracleConnection connection) + throws SQLException { + + final Scn latestTableDdlScn = getLatestTableDdlScn(ctx, connection).orElse(null); + final String tableName = getTransactionTableName(connectorConfig); + + final Map pendingTransactions = new LinkedHashMap<>(); + + final Optional currentScn; + if (isPendingTransactionSkip(connectorConfig)) { + currentScn = getCurrentScn(latestTableDdlScn, connection); + } else { + currentScn = + getPendingTransactions( + latestTableDdlScn, connection, pendingTransactions, tableName); + } + + if (!currentScn.isPresent()) { + throw new DebeziumException("Failed to resolve current SCN"); + } + + // The provided snapshot connection already has an in-progress transaction with a save point + // that prevents switching from a PDB to the root CDB and if invoking the LogMiner APIs on + // such a connection, the use of commit/rollback by LogMiner will drop/invalidate the save + // point as well. A separate connection is necessary to preserve the save point. + try (OracleConnection conn = + new OracleConnection( + connection.config(), () -> getClass().getClassLoader(), false)) { + conn.setAutoCommit(false); + if (!Strings.isNullOrEmpty(connectorConfig.getPdbName())) { + // The next stage cannot be run within the PDB, reset the connection to the CDB. + conn.resetSessionToCdb(); + } + return determineSnapshotOffset( + connectorConfig, conn, currentScn.get(), pendingTransactions, tableName); + } + } + + private Optional getCurrentScn(Scn latestTableDdlScn, OracleConnection connection) + throws SQLException { + final String query = "SELECT CURRENT_SCN FROM V$DATABASE"; + + Scn currentScn; + do { + currentScn = + connection.queryAndMap( + query, rs -> rs.next() ? Scn.valueOf(rs.getString(1)) : Scn.NULL); + } while (areSameTimestamp(latestTableDdlScn, currentScn, connection)); + + return Optional.ofNullable(currentScn); + } + + private Optional getPendingTransactions( + Scn latestTableDdlScn, + OracleConnection connection, + Map transactions, + String transactionTableName) + throws SQLException { + final String query = + "SELECT d.CURRENT_SCN, t.XID, t.START_SCN " + + "FROM V$DATABASE d " + + "LEFT OUTER JOIN " + + transactionTableName + + " t " + + "ON t.START_SCN < d.CURRENT_SCN "; + + Scn currentScn = null; + do { + // Clear iterative state + currentScn = null; + transactions.clear(); + + try (Statement s = connection.connection().createStatement(); + ResultSet rs = s.executeQuery(query)) { + List results = new ArrayList<>(); + Statement s2 = connection.connection().createStatement(); + ResultSet rs2 = + s2.executeQuery( + "SELECT t.START_SCN, t.START_SCNB, t.DEPENDENT_SCN FROM V$TRANSACTION t"); + while (rs2.next()) { + results.add( + String.join( + " | ", rs2.getString(1), rs2.getString(2), rs2.getString(3))); + } + if (!results.isEmpty()) { + LOGGER.info("NOT EMPTY TRSNASSS: {}", results); + } + rs2.close(); + + while (rs.next()) { + if (currentScn == null) { + // Only need to set this once per iteration + currentScn = Scn.valueOf(rs.getString(1)); + } + final String pendingTxStartScn = rs.getString(3); + if (!Strings.isNullOrEmpty(pendingTxStartScn)) { + // There is a pending transaction, capture state + transactions.put( + HexConverter.convertToHexString(rs.getBytes(2)), + Scn.valueOf(pendingTxStartScn)); + } + } + } catch (SQLException e) { + LOGGER.warn( + "Could not query the {} view: {}", transactionTableName, e.getMessage(), e); + throw e; + } + + } while (areSameTimestamp(latestTableDdlScn, currentScn, connection)); + + for (Map.Entry transaction : transactions.entrySet()) { + LOGGER.trace( + "\tPending Transaction '{}' started at SCN {}", + transaction.getKey(), + transaction.getValue()); + } + + return Optional.ofNullable(currentScn); + } + + private OracleOffsetContext determineSnapshotOffset( + OracleConnectorConfig connectorConfig, + OracleConnection connection, + Scn currentScn, + Map pendingTransactions, + String transactionTableName) + throws SQLException { + + if (isPendingTransactionSkip(connectorConfig)) { + LOGGER.info("\tNo in-progress transactions will be captured."); + } else if (isPendingTransactionViewOnly(connectorConfig)) { + LOGGER.info( + "\tSkipping transaction logs for resolving snapshot offset, only using {}.", + transactionTableName); + } else { + LOGGER.info( + "\tConsulting {} and transaction logs for resolving snapshot offset.", + transactionTableName); + getPendingTransactionsFromLogs(connection, currentScn, pendingTransactions); + } + + if (!pendingTransactions.isEmpty()) { + for (Map.Entry entry : pendingTransactions.entrySet()) { + LOGGER.info( + "\tFound in-progress transaction {}, starting at SCN {}", + entry.getKey(), + entry.getValue()); + } + } else if (!isPendingTransactionSkip(connectorConfig)) { + LOGGER.info("\tFound no in-progress transactions."); + } + + return OracleOffsetContext.create() + .logicalName(connectorConfig) + .scn(currentScn) + .snapshotScn(currentScn) + .snapshotPendingTransactions(pendingTransactions) + .transactionContext(new TransactionContext()) + .incrementalSnapshotContext(new SignalBasedIncrementalSnapshotContext<>()) + .build(); + } + + private void addLogsToSession(List logs, OracleConnection connection) + throws SQLException { + for (LogFile logFile : logs) { + LOGGER.debug("\tAdding log: {}", logFile.getFileName()); + connection.executeWithoutCommitting( + SqlUtils.addLogFileStatement("DBMS_LOGMNR.ADDFILE", logFile.getFileName())); + } + } + + private void startSession(OracleConnection connection) throws SQLException { + // We explicitly use the ONLINE data dictionary mode here. + // Since we are only concerned about non-SQL columns, it is safe to always use this mode + final String query = + "BEGIN sys.dbms_logmnr.start_logmnr(" + + "OPTIONS => DBMS_LOGMNR.DICT_FROM_ONLINE_CATALOG + DBMS_LOGMNR.NO_ROWID_IN_STMT);" + + "END;"; + LOGGER.debug("\tStarting mining session"); + connection.executeWithoutCommitting(query); + } + + private void stopSession(OracleConnection connection) throws SQLException { + // stop the current mining session + try { + LOGGER.debug("\tStopping mining session"); + connection.executeWithoutCommitting("BEGIN SYS.DBMS_LOGMNR.END_LOGMNR(); END;"); + } catch (SQLException e) { + if (e.getMessage().toUpperCase().contains("ORA-01307")) { + LOGGER.debug("LogMiner mining session is already closed."); + } else { + throw e; + } + } + } + + private Scn getOldestScnAvailableInLogs( + OracleConnectorConfig config, OracleConnection connection) throws SQLException { + final Duration archiveLogRetention = config.getLogMiningArchiveLogRetention(); + final String archiveLogDestinationName = config.getLogMiningArchiveDestinationName(); + return connection.queryAndMap( + SqlUtils.oldestFirstChangeQuery(archiveLogRetention, archiveLogDestinationName), + rs -> { + if (rs.next()) { + final String value = rs.getString(1); + if (!Strings.isNullOrEmpty(value)) { + return Scn.valueOf(value); + } + } + return Scn.NULL; + }); + } + + private List getOrderedLogsFromScn( + OracleConnectorConfig config, Scn sinceScn, OracleConnection connection) + throws SQLException { + return LogMinerHelper.getLogFilesForOffsetScn( + connection, + sinceScn, + config.getLogMiningArchiveLogRetention(), + config.isArchiveLogOnlyMode(), + config.getLogMiningArchiveDestinationName()) + .stream() + .sorted(Comparator.comparing(LogFile::getSequence)) + .collect(Collectors.toList()); + } + + private void getPendingTransactionsFromLogs( + OracleConnection connection, Scn currentScn, Map pendingTransactions) + throws SQLException { + final Scn oldestScn = getOldestScnAvailableInLogs(connectorConfig, connection); + final List logFiles = + getOrderedLogsFromScn(connectorConfig, oldestScn, connection); + if (!logFiles.isEmpty()) { + try { + addLogsToSession(getMostRecentLogFilesForSearch(logFiles), connection); + startSession(connection); + + LOGGER.info("\tQuerying transaction logs, please wait..."); + connection.query( + "SELECT START_SCN, XID FROM V$LOGMNR_CONTENTS WHERE OPERATION_CODE=7 AND SCN >= " + + currentScn + + " AND START_SCN <= " + + currentScn, + rs -> { + while (rs.next()) { + final String transactionId = + HexConverter.convertToHexString(rs.getBytes("XID")); + final String startScnStr = rs.getString("START_SCN"); + if (!Strings.isNullOrBlank(startScnStr)) { + final Scn startScn = Scn.valueOf(rs.getString("START_SCN")); + if (!pendingTransactions.containsKey(transactionId)) { + LOGGER.info( + "\tTransaction '{}' started at SCN '{}'", + transactionId, + startScn); + pendingTransactions.put(transactionId, startScn); + } + } + } + }); + } catch (Exception e) { + throw new DebeziumException("Failed to resolve snapshot offset", e); + } finally { + stopSession(connection); + } + } + } + + private List getMostRecentLogFilesForSearch(List allLogFiles) { + Map> recentLogsPerThread = new HashMap<>(); + for (LogFile logFile : allLogFiles) { + if (!recentLogsPerThread.containsKey(logFile.getThread())) { + if (logFile.isCurrent()) { + recentLogsPerThread.put(logFile.getThread(), new ArrayList<>()); + recentLogsPerThread.get(logFile.getThread()).add(logFile); + final Optional maxArchiveLogFile = + allLogFiles.stream() + .filter( + f -> + logFile.getThread() == f.getThread() + && logFile.getSequence() + .compareTo( + f.getSequence()) + > 0) + .max(Comparator.comparing(LogFile::getSequence)); + maxArchiveLogFile.ifPresent( + file -> recentLogsPerThread.get(logFile.getThread()).add(file)); + } + } + } + + final List logs = new ArrayList<>(); + for (Map.Entry> entry : recentLogsPerThread.entrySet()) { + logs.addAll(entry.getValue()); + } + return logs; + } + + private boolean isPendingTransactionSkip(OracleConnectorConfig config) { + return config.getLogMiningTransactionSnapshotBoundaryMode() + == TransactionSnapshotBoundaryMode.SKIP; + } + + public boolean isPendingTransactionViewOnly(OracleConnectorConfig config) { + return config.getLogMiningTransactionSnapshotBoundaryMode() + == TransactionSnapshotBoundaryMode.TRANSACTION_VIEW_ONLY; + } + + /** + * Under Oracle RAC, the V$ tables are specific the node that the JDBC connection is established + * to and not every V$ is synchronized across the cluster. Therefore, when Oracle RAC is in + * play, we should use the GV$ tables instead. + * + * @param config the connector configuration, should not be {@code null} + * @return the pending transaction table name + */ + private static String getTransactionTableName(OracleConnectorConfig config) { + if (config.getRacNodes() == null || config.getRacNodes().isEmpty()) { + return "V$TRANSACTION"; + } + return "GV$TRANSACTION"; + } +} diff --git a/flink-connector-oracle-cdc/src/main/java/io/debezium/connector/oracle/logminer/LogMinerStreamingChangeEventSource.java b/flink-connector-oracle-cdc/src/main/java/io/debezium/connector/oracle/logminer/LogMinerStreamingChangeEventSource.java index e5519d9313a..c813ab005d6 100644 --- a/flink-connector-oracle-cdc/src/main/java/io/debezium/connector/oracle/logminer/LogMinerStreamingChangeEventSource.java +++ b/flink-connector-oracle-cdc/src/main/java/io/debezium/connector/oracle/logminer/LogMinerStreamingChangeEventSource.java @@ -22,12 +22,19 @@ import io.debezium.connector.oracle.OracleConnectorConfig; import io.debezium.connector.oracle.OracleDatabaseSchema; import io.debezium.connector.oracle.OracleOffsetContext; +import io.debezium.connector.oracle.OraclePartition; import io.debezium.connector.oracle.OracleStreamingChangeEventSourceMetrics; import io.debezium.connector.oracle.Scn; +import io.debezium.connector.oracle.logminer.logwriter.CommitLogWriterFlushStrategy; +import io.debezium.connector.oracle.logminer.logwriter.LogWriterFlushStrategy; +import io.debezium.connector.oracle.logminer.logwriter.RacCommitLogWriterFlushStrategy; +import io.debezium.connector.oracle.logminer.processor.LogMinerEventProcessor; import io.debezium.jdbc.JdbcConfiguration; import io.debezium.pipeline.ErrorHandler; import io.debezium.pipeline.EventDispatcher; import io.debezium.pipeline.source.spi.StreamingChangeEventSource; +import io.debezium.relational.Column; +import io.debezium.relational.Table; import io.debezium.relational.TableId; import io.debezium.util.Clock; import io.debezium.util.Metronome; @@ -36,12 +43,14 @@ import org.slf4j.LoggerFactory; import java.math.BigInteger; -import java.sql.PreparedStatement; -import java.sql.ResultSet; import java.sql.SQLException; +import java.text.DecimalFormat; import java.time.Duration; import java.time.Instant; +import java.time.OffsetDateTime; +import java.time.temporal.ChronoUnit; import java.util.ArrayList; +import java.util.Collections; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; @@ -50,36 +59,27 @@ import java.util.Set; import java.util.stream.Collectors; -import static io.debezium.connector.oracle.logminer.LogMinerHelper.buildDataDictionary; -import static io.debezium.connector.oracle.logminer.LogMinerHelper.checkSupplementalLogging; -import static io.debezium.connector.oracle.logminer.LogMinerHelper.endMining; -import static io.debezium.connector.oracle.logminer.LogMinerHelper.getCurrentRedoLogFiles; -import static io.debezium.connector.oracle.logminer.LogMinerHelper.getEndScn; -import static io.debezium.connector.oracle.logminer.LogMinerHelper.getFirstOnlineLogScn; -import static io.debezium.connector.oracle.logminer.LogMinerHelper.getLastScnToAbandon; -import static io.debezium.connector.oracle.logminer.LogMinerHelper.getSystime; -import static io.debezium.connector.oracle.logminer.LogMinerHelper.instantiateFlushConnections; import static io.debezium.connector.oracle.logminer.LogMinerHelper.logError; import static io.debezium.connector.oracle.logminer.LogMinerHelper.setLogFilesForMining; -import static io.debezium.connector.oracle.logminer.LogMinerHelper.setNlsSessionParameters; -import static io.debezium.connector.oracle.logminer.LogMinerHelper.startLogMining; /** - * A {@link StreamingChangeEventSource} based on Oracle's LogMiner utility. The event handler loop - * is executed in a separate executor, and add method afterHandleScn. + * Copied from Debezium 1.9.7. Diff: added afterHandleScn() method. A {@link + * StreamingChangeEventSource} based on Oracle's LogMiner utility. The event handler loop is + * executed in a separate executor. */ public class LogMinerStreamingChangeEventSource - implements StreamingChangeEventSource { + implements StreamingChangeEventSource { private static final Logger LOGGER = LoggerFactory.getLogger(LogMinerStreamingChangeEventSource.class); + private static final int MAXIMUM_NAME_LENGTH = 30; + private static final String ALL_COLUMN_LOGGING = "ALL COLUMN LOGGING"; + private static final int MINING_START_RETRIES = 5; private final OracleConnection jdbcConnection; - private final EventDispatcher dispatcher; + private final EventDispatcher dispatcher; private final Clock clock; private final OracleDatabaseSchema schema; - private final boolean isRac; - private final Set racHosts = new HashSet<>(); private final JdbcConfiguration jdbcConfiguration; private final OracleConnectorConfig.LogMiningStrategy strategy; private final ErrorHandler errorHandler; @@ -89,15 +89,20 @@ public class LogMinerStreamingChangeEventSource private final Duration archiveLogRetention; private final boolean archiveLogOnlyMode; private final String archiveDestinationName; + private final int logFileQueryMaxRetries; + private final Duration initialDelay; + private final Duration maxDelay; - private Scn startScn; + private Scn startScn; // startScn is the **exclusive** lower bound for mining private Scn endScn; + private Scn snapshotScn; + private List currentLogFiles; private List currentRedoLogSequences; public LogMinerStreamingChangeEventSource( OracleConnectorConfig connectorConfig, OracleConnection jdbcConnection, - EventDispatcher dispatcher, + EventDispatcher dispatcher, ErrorHandler errorHandler, Clock clock, OracleDatabaseSchema schema, @@ -113,17 +118,12 @@ public LogMinerStreamingChangeEventSource( this.errorHandler = errorHandler; this.streamingMetrics = streamingMetrics; this.jdbcConfiguration = JdbcConfiguration.adapt(jdbcConfig); - this.isRac = connectorConfig.isRacSystem(); - if (this.isRac) { - this.racHosts.addAll( - connectorConfig.getRacNodes().stream() - .map(String::toUpperCase) - .collect(Collectors.toSet())); - instantiateFlushConnections(jdbcConfiguration, racHosts); - } this.archiveLogRetention = connectorConfig.getLogMiningArchiveLogRetention(); this.archiveLogOnlyMode = connectorConfig.isArchiveLogOnlyMode(); this.archiveDestinationName = connectorConfig.getLogMiningArchiveDestinationName(); + this.logFileQueryMaxRetries = connectorConfig.getMaximumNumberOfLogQueryRetries(); + this.initialDelay = connectorConfig.getLogMiningInitialDelay(); + this.maxDelay = connectorConfig.getLogMiningMaxDelay(); } /** @@ -132,20 +132,34 @@ public LogMinerStreamingChangeEventSource( * @param context change event source context */ @Override - public void execute(ChangeEventSourceContext context, OracleOffsetContext offsetContext) { - try (TransactionalBuffer transactionalBuffer = - new TransactionalBuffer( - connectorConfig, schema, clock, errorHandler, streamingMetrics)) { - try { - startScn = offsetContext.getScn(); - - if (!isContinuousMining - && startScn.compareTo( - getFirstOnlineLogScn( - jdbcConnection, - archiveLogRetention, - archiveDestinationName)) - < 0) { + public void execute( + ChangeEventSourceContext context, + OraclePartition partition, + OracleOffsetContext offsetContext) { + if (!connectorConfig.getSnapshotMode().shouldStream()) { + LOGGER.info("Streaming is not enabled in current configuration"); + return; + } + try { + // We explicitly expect auto-commit to be disabled + jdbcConnection.setAutoCommit(false); + + startScn = offsetContext.getScn(); + snapshotScn = offsetContext.getSnapshotScn(); + Scn firstScn = getFirstScnInLogs(jdbcConnection); + if (startScn.compareTo(snapshotScn) == 0) { + // This is the initial run of the streaming change event source. + // We need to compute the correct start offset for mining. That is not the snapshot + // offset, + // but the start offset of the oldest transaction that was still pending when the + // snapshot + // was taken. + computeStartScnForFirstMiningSession(offsetContext, firstScn); + } + + try (LogWriterFlushStrategy flushStrategy = resolveFlushStrategy()) { + if (!isContinuousMining && startScn.compareTo(firstScn.subtract(Scn.ONE)) < 0) { + // startScn is the exclusive lower bound, so must be >= (firstScn - 1) throw new DebeziumException( "Online REDO LOG files or archive log files do not contain the offset scn " + startScn @@ -153,206 +167,237 @@ public void execute(ChangeEventSourceContext context, OracleOffsetContext offset } setNlsSessionParameters(jdbcConnection); - checkSupplementalLogging(jdbcConnection, connectorConfig.getPdbName(), schema); + checkDatabaseAndTableState(jdbcConnection, connectorConfig.getPdbName(), schema); - if (archiveLogOnlyMode && !waitForStartScnInArchiveLogs(context, startScn)) { - return; - } + try (LogMinerEventProcessor processor = + createProcessor(context, partition, offsetContext)) { - initializeRedoLogsForMining(jdbcConnection, false, startScn); - - HistoryRecorder historyRecorder = connectorConfig.getLogMiningHistoryRecorder(); - - try { - // todo: why can't OracleConnection be used rather than a - // Factory+JdbcConfiguration? - historyRecorder.prepare( - streamingMetrics, - jdbcConfiguration, - connectorConfig.getLogMinerHistoryRetentionHours()); - - final LogMinerQueryResultProcessor processor = - new LogMinerQueryResultProcessor( - context, - connectorConfig, - streamingMetrics, - transactionalBuffer, - offsetContext, - schema, - dispatcher, - historyRecorder); - - final String query = - LogMinerQueryBuilder.build( - connectorConfig, schema, jdbcConnection.username()); - try (PreparedStatement miningView = - jdbcConnection - .connection() - .prepareStatement( - query, - ResultSet.TYPE_FORWARD_ONLY, - ResultSet.CONCUR_READ_ONLY, - ResultSet.HOLD_CURSORS_OVER_COMMIT)) { - - currentRedoLogSequences = getCurrentRedoLogSequences(); - Stopwatch stopwatch = Stopwatch.reusable(); - while (context.isRunning()) { - // Calculate time difference before each mining session to detect time - // zone offset changes (e.g. DST) on database server - streamingMetrics.calculateTimeDifference(getSystime(jdbcConnection)); - - if (archiveLogOnlyMode - && !waitForStartScnInArchiveLogs(context, startScn)) { - break; - } + if (archiveLogOnlyMode && !waitForStartScnInArchiveLogs(context, startScn)) { + return; + } - Instant start = Instant.now(); - endScn = - getEndScn( - jdbcConnection, - startScn, - endScn, - streamingMetrics, - connectorConfig.getLogMiningBatchSizeDefault(), - connectorConfig.isLobEnabled(), - connectorConfig.isArchiveLogOnlyMode(), - connectorConfig.getLogMiningArchiveDestinationName()); - - // This is a small window where when archive log only mode has - // completely caught up to the last - // record in the archive logs that both the start and end values are - // identical. In this use - // case we want to pause and restart the loop waiting for a new archive - // log before proceeding. - if (archiveLogOnlyMode && startScn.equals(endScn)) { - pauseBetweenMiningSessions(); - continue; - } + initializeRedoLogsForMining(jdbcConnection, false, startScn); - if (hasLogSwitchOccurred()) { - // This is the way to mitigate PGA leaks. - // With one mining session, it grows and maybe there is another way - // to flush PGA. - // At this point we use a new mining session - LOGGER.trace( - "Ending log mining startScn={}, endScn={}, offsetContext.getScn={}, strategy={}, continuous={}", - startScn, - endScn, - offsetContext.getScn(), - strategy, - isContinuousMining); - endMining(jdbcConnection); - - initializeRedoLogsForMining(jdbcConnection, true, startScn); - - abandonOldTransactionsIfExist( - jdbcConnection, offsetContext, transactionalBuffer); - - // This needs to be re-calculated because building the data - // dictionary will force the - // current redo log sequence to be advanced due to a complete log - // switch of all logs. - currentRedoLogSequences = getCurrentRedoLogSequences(); - } + int retryAttempts = 1; + Stopwatch sw = Stopwatch.accumulating().start(); + while (context.isRunning()) { + // Calculate time difference before each mining session to detect time zone + // offset changes (e.g. DST) on database server + streamingMetrics.calculateTimeDifference( + getDatabaseSystemTime(jdbcConnection)); - startLogMining( - jdbcConnection, - startScn, - endScn, - strategy, - isContinuousMining, - streamingMetrics); - - LOGGER.trace( - "Fetching LogMiner view results SCN {} to {}", - startScn, - endScn); - stopwatch.start(); - miningView.setFetchSize(connectorConfig.getMaxQueueSize()); - miningView.setFetchDirection(ResultSet.FETCH_FORWARD); - miningView.setString(1, startScn.toString()); - miningView.setString(2, endScn.toString()); - try (ResultSet rs = miningView.executeQuery()) { - Duration lastDurationOfBatchCapturing = - stopwatch.stop().durations().statistics().getTotal(); - streamingMetrics.setLastDurationOfBatchCapturing( - lastDurationOfBatchCapturing); - processor.processResult(rs); - if (connectorConfig.isLobEnabled()) { - startScn = - transactionalBuffer.updateOffsetContext( - offsetContext, dispatcher); - } else { - - final Scn lastProcessedScn = processor.getLastProcessedScn(); - if (!lastProcessedScn.isNull() - && lastProcessedScn.compareTo(endScn) < 0) { - // If the last processed SCN is before the endScn we need to - // use the last processed SCN as the - // next starting point as the LGWR buffer didn't flush all - // entries from memory to disk yet. - endScn = lastProcessedScn; - } - - if (transactionalBuffer.isEmpty()) { - LOGGER.debug( - "Buffer is empty, updating offset SCN to {}", - endScn); - offsetContext.setScn(endScn); - } else { - final Scn minStartScn = transactionalBuffer.getMinimumScn(); - if (!minStartScn.isNull()) { - offsetContext.setScn( - minStartScn.subtract(Scn.valueOf(1))); - dispatcher.dispatchHeartbeatEvent(offsetContext); - } - } - startScn = endScn; - } + if (archiveLogOnlyMode + && !waitForStartScnInArchiveLogs(context, startScn)) { + break; + } + + Instant start = Instant.now(); + endScn = calculateEndScn(jdbcConnection, startScn, endScn); + + // This is a small window where when archive log only mode has completely + // caught up to the last + // record in the archive logs that both the start and end values are + // identical. In this use + // case we want to pause and restart the loop waiting for a new archive log + // before proceeding. + if (archiveLogOnlyMode && startScn.equals(endScn)) { + pauseBetweenMiningSessions(); + continue; + } + + flushStrategy.flush(jdbcConnection.getCurrentScn()); + + boolean restartRequired = false; + if (connectorConfig.getLogMiningMaximumSession().isPresent()) { + final Duration totalDuration = + sw.stop().durations().statistics().getTotal(); + if (totalDuration.toMillis() + >= connectorConfig + .getLogMiningMaximumSession() + .get() + .toMillis()) { + LOGGER.info( + "LogMiner session has exceeded maximum session time of '{}', forcing restart.", + connectorConfig.getLogMiningMaximumSession()); + restartRequired = true; + } else { + // resume the existing stop watch, we haven't met the criteria yet + sw.start(); } + } - afterHandleScn(offsetContext); - streamingMetrics.setCurrentBatchProcessingTime( - Duration.between(start, Instant.now())); + if (restartRequired || hasLogSwitchOccurred()) { + // This is the way to mitigate PGA leaks. + // With one mining session, it grows and maybe there is another way to + // flush PGA. + // At this point we use a new mining session + endMiningSession(jdbcConnection, offsetContext); + initializeRedoLogsForMining(jdbcConnection, true, startScn); + + // log switch or restart required, re-create a new stop watch + sw = Stopwatch.accumulating().start(); + } + + if (context.isRunning()) { + if (!startMiningSession( + jdbcConnection, startScn, endScn, retryAttempts)) { + retryAttempts++; + } else { + retryAttempts = 1; + startScn = processor.process(partition, startScn, endScn); + streamingMetrics.setCurrentBatchProcessingTime( + Duration.between(start, Instant.now())); + captureSessionMemoryStatistics(jdbcConnection); + } pauseBetweenMiningSessions(); } + + afterHandleScn(partition, offsetContext); } - } finally { - historyRecorder.close(); } - } catch (Throwable t) { - logError(streamingMetrics, "Mining session stopped due to the {}", t); - errorHandler.setProducerThrowable(t); - } finally { + } + } catch (Throwable t) { + logError(streamingMetrics, "Mining session stopped due to the {}", t); + errorHandler.setProducerThrowable(t); + } finally { + LOGGER.info("startScn={}, endScn={}", startScn, endScn); + LOGGER.info("Streaming metrics dump: {}", streamingMetrics.toString()); + LOGGER.info("Offsets: {}", offsetContext); + } + } + + protected void afterHandleScn(OraclePartition partition, OracleOffsetContext offsetContext) {} + + /** + * Computes the start SCN for the first mining session. + * + *

Normally, this would be the snapshot SCN, but if there were pending transactions at the + * time the snapshot was taken, we'd miss the events in those transactions that have an SCN + * smaller than the snapshot SCN. + * + * @param offsetContext the offset context + * @param firstScn the oldest SCN still available in the REDO logs + */ + private void computeStartScnForFirstMiningSession( + OracleOffsetContext offsetContext, Scn firstScn) { + // This is the initial run of the streaming change event source. + // We need to compute the correct start offset for mining. That is not the snapshot offset, + // but the start offset of the oldest transaction that was still pending when the snapshot + // was taken. + Map snapshotPendingTransactions = + offsetContext.getSnapshotPendingTransactions(); + if (snapshotPendingTransactions == null || snapshotPendingTransactions.isEmpty()) { + // no pending transactions, we can start mining from the snapshot SCN + startScn = snapshotScn; + } else { + // find the oldest transaction we can still fully process, and start from there. + Scn minScn = snapshotScn; + for (Map.Entry entry : snapshotPendingTransactions.entrySet()) { + String transactionId = entry.getKey(); + Scn scn = entry.getValue(); + LOGGER.info( + "Transaction {} was pending across snapshot boundary. Start SCN = {}, snapshot SCN = {}", + transactionId, + scn, + startScn); + if (scn.compareTo(firstScn) < 0) { + LOGGER.warn( + "Transaction {} was still ongoing while snapshot was taken, but is no longer completely recorded in the archive logs. Events will be lost. Oldest SCN in logs = {}, TX start SCN = {}", + transactionId, + firstScn, + scn); + minScn = firstScn; + } else if (scn.compareTo(minScn) < 0) { + minScn = scn; + } + } + + // Make sure the commit SCN is at least the snapshot SCN - 1. + // This ensures we'll never emit events for transactions that were complete before the + // snapshot was + // taken. + if (offsetContext.getCommitScn().compareTo(snapshotScn) < 0) { + LOGGER.info( + "Setting commit SCN to {} (snapshot SCN - 1) to ensure we don't double-emit events from pre-snapshot transactions.", + snapshotScn.subtract(Scn.ONE)); + offsetContext + .getCommitScn() + .setCommitScnOnAllThreads(snapshotScn.subtract(Scn.ONE)); + } + + // set start SCN to minScn + if (minScn.compareTo(startScn) <= 0) { LOGGER.info( - "startScn={}, endScn={}, offsetContext.getScn()={}", + "Resetting start SCN from {} (snapshot SCN) to {} (start of oldest complete pending transaction)", startScn, - endScn, - offsetContext.getScn()); - LOGGER.info("Transactional buffer dump: {}", transactionalBuffer.toString()); - LOGGER.info("Streaming metrics dump: {}", streamingMetrics.toString()); + minScn); + startScn = minScn.subtract(Scn.ONE); } } + offsetContext.setScn(startScn); + } + + private void captureSessionMemoryStatistics(OracleConnection connection) throws SQLException { + long sessionUserGlobalAreaMemory = + connection.getSessionStatisticByName("session uga memory"); + long sessionUserGlobalAreaMaxMemory = + connection.getSessionStatisticByName("session uga memory max"); + streamingMetrics.setUserGlobalAreaMemory( + sessionUserGlobalAreaMemory, sessionUserGlobalAreaMaxMemory); + + long sessionProcessGlobalAreaMemory = + connection.getSessionStatisticByName("session pga memory"); + long sessionProcessGlobalAreaMaxMemory = + connection.getSessionStatisticByName("session pga memory max"); + streamingMetrics.setProcessGlobalAreaMemory( + sessionProcessGlobalAreaMemory, sessionProcessGlobalAreaMaxMemory); + + final DecimalFormat format = new DecimalFormat("#.##"); + LOGGER.debug( + "Oracle Session UGA {}MB (max = {}MB), PGA {}MB (max = {}MB)", + format.format(sessionUserGlobalAreaMemory / 1024.f / 1024.f), + format.format(sessionUserGlobalAreaMaxMemory / 1024.f / 1024.f), + format.format(sessionProcessGlobalAreaMemory / 1024.f / 1024.f), + format.format(sessionProcessGlobalAreaMaxMemory / 1024.f / 1024.f)); } - protected void afterHandleScn(OracleOffsetContext offsetContext) {} - - private void abandonOldTransactionsIfExist( - OracleConnection connection, - OracleOffsetContext offsetContext, - TransactionalBuffer transactionalBuffer) { - Duration transactionRetention = connectorConfig.getLogMiningTransactionRetention(); - if (!Duration.ZERO.equals(transactionRetention)) { - final Scn offsetScn = offsetContext.getScn(); - Optional lastScnToAbandonTransactions = - getLastScnToAbandon(connection, offsetScn, transactionRetention); - lastScnToAbandonTransactions.ifPresent( - thresholdScn -> { - transactionalBuffer.abandonLongTransactions(thresholdScn, offsetContext); - offsetContext.setScn(thresholdScn); - startScn = endScn; - }); + private LogMinerEventProcessor createProcessor( + ChangeEventSourceContext context, + OraclePartition partition, + OracleOffsetContext offsetContext) { + final OracleConnectorConfig.LogMiningBufferType bufferType = + connectorConfig.getLogMiningBufferType(); + return bufferType.createProcessor( + context, + connectorConfig, + jdbcConnection, + dispatcher, + partition, + offsetContext, + schema, + streamingMetrics); + } + + /** + * Gets the first system change number in both archive and redo logs. + * + * @param connection database connection, should not be {@code null} + * @return the oldest system change number + * @throws SQLException if a database exception occurred + * @throws DebeziumException if the oldest system change number cannot be found due to no logs + * available + */ + private Scn getFirstScnInLogs(OracleConnection connection) throws SQLException { + String oldestScn = + connection.singleOptionalValue( + SqlUtils.oldestFirstChangeQuery( + archiveLogRetention, archiveDestinationName), + rs -> rs.getString(1)); + if (oldestScn == null) { + throw new DebeziumException("Failed to calculate oldest SCN available in logs"); } + LOGGER.trace("Oldest SCN in logs is '{}'", oldestScn); + return Scn.valueOf(oldestScn); } private void initializeRedoLogsForMining( @@ -363,26 +408,110 @@ private void initializeRedoLogsForMining( buildDataDictionary(connection); } if (!isContinuousMining) { - setLogFilesForMining( - connection, - startScn, - archiveLogRetention, - archiveLogOnlyMode, - archiveDestinationName); + currentLogFiles = + setLogFilesForMining( + connection, + startScn, + archiveLogRetention, + archiveLogOnlyMode, + archiveDestinationName, + logFileQueryMaxRetries, + initialDelay, + maxDelay); + currentRedoLogSequences = getCurrentLogFileSequences(currentLogFiles); } } else { if (!isContinuousMining) { if (OracleConnectorConfig.LogMiningStrategy.CATALOG_IN_REDO.equals(strategy)) { buildDataDictionary(connection); } - setLogFilesForMining( - connection, - startScn, - archiveLogRetention, - archiveLogOnlyMode, - archiveDestinationName); + currentLogFiles = + setLogFilesForMining( + connection, + startScn, + archiveLogRetention, + archiveLogOnlyMode, + archiveDestinationName, + logFileQueryMaxRetries, + initialDelay, + maxDelay); + currentRedoLogSequences = getCurrentLogFileSequences(currentLogFiles); } } + + updateRedoLogMetrics(); + } + + /** + * Get the current log file sequences from the supplied list of log files. + * + * @param logFiles list of log files + * @return list of sequences for the logs that are marked "current" in the database. + */ + private List getCurrentLogFileSequences(List logFiles) { + if (logFiles == null || logFiles.isEmpty()) { + return Collections.emptyList(); + } + return logFiles.stream() + .filter(LogFile::isCurrent) + .map(LogFile::getSequence) + .collect(Collectors.toList()); + } + + /** + * Get the maximum archive log SCN. + * + * @param logFiles the current logs that are part of the mining session + * @return the maximum system change number from the archive logs + * @throws DebeziumException if no logs are provided or if the provided logs has no archive log + * types + */ + private Scn getMaxArchiveLogScn(List logFiles) { + if (logFiles == null || logFiles.isEmpty()) { + throw new DebeziumException( + "Cannot get maximum archive log SCN as no logs were available."); + } + + final List archiveLogs = + logFiles.stream() + .filter(log -> log.getType().equals(LogFile.Type.ARCHIVE)) + .collect(Collectors.toList()); + + if (archiveLogs.isEmpty()) { + throw new DebeziumException( + "Cannot get maximum archive log SCN as no archive logs are present."); + } + + Scn maxScn = archiveLogs.get(0).getNextScn(); + for (int i = 1; i < archiveLogs.size(); ++i) { + Scn nextScn = archiveLogs.get(i).getNextScn(); + if (nextScn.compareTo(maxScn) > 0) { + maxScn = nextScn; + } + } + + LOGGER.debug("Maximum archive log SCN resolved as {}", maxScn); + return maxScn; + } + + /** + * Requests Oracle to build the data dictionary. + * + *

During the build step, Oracle will perform an additional series of redo log switches. + * Additionally, this call may introduce a delay in delivering incremental changes since the + * dictionary will need to have statistics gathered, analyzed, and prepared by LogMiner before + * any redo entries can be mined. + * + *

This should only be used in conjunction with the mining strategy {@link + * io.debezium.connector.oracle.OracleConnectorConfig.LogMiningStrategy#CATALOG_IN_REDO}. + * + * @param connection database connection + * @throws SQLException if a database exception occurred + */ + private void buildDataDictionary(OracleConnection connection) throws SQLException { + LOGGER.trace("Building data dictionary"); + connection.executeWithoutCommitting( + "BEGIN DBMS_LOGMNR_D.BUILD (options => DBMS_LOGMNR_D.STORE_IN_REDO_LOGS); END;"); } /** @@ -401,17 +530,6 @@ private boolean hasLogSwitchOccurred() throws SQLException { currentRedoLogSequences = newSequences; - final Map logStatuses = - jdbcConnection.queryAndMap( - SqlUtils.redoLogStatusQuery(), - rs -> { - Map results = new LinkedHashMap<>(); - while (rs.next()) { - results.put(rs.getString(1), rs.getString(2)); - } - return results; - }); - final int logSwitchCount = jdbcConnection.queryAndMap( SqlUtils.switchHistoryQuery(archiveDestinationName), @@ -421,19 +539,56 @@ private boolean hasLogSwitchOccurred() throws SQLException { } return 0; }); - - final Set fileNames = getCurrentRedoLogFiles(jdbcConnection); - - streamingMetrics.setRedoLogStatus(logStatuses); streamingMetrics.setSwitchCount(logSwitchCount); - streamingMetrics.setCurrentLogFileName(fileNames); - return true; } return false; } + /** + * Updates the redo log names and statues in the streaming metrics. + * + * @throws SQLException if a database exception occurred + */ + private void updateRedoLogMetrics() throws SQLException { + final Map logStatuses = + jdbcConnection.queryAndMap( + SqlUtils.redoLogStatusQuery(), + rs -> { + Map results = new LinkedHashMap<>(); + while (rs.next()) { + results.put(rs.getString(1), rs.getString(2)); + } + return results; + }); + + final Set fileNames = getCurrentRedoLogFiles(jdbcConnection); + streamingMetrics.setCurrentLogFileName(fileNames); + streamingMetrics.setRedoLogStatus(logStatuses); + } + + /** + * Get a list of all the CURRENT redo log file names. For Oracle RAC clusters, multiple + * filenames will be returned, one for each node that participates in the cluster. + * + * @param connection database connection, should not be {@code null} + * @return unique set of all current redo log file names, with full paths, never {@code null} + * @throws SQLException if a database exception occurred + */ + private Set getCurrentRedoLogFiles(OracleConnection connection) throws SQLException { + final Set fileNames = new HashSet<>(); + connection.query( + SqlUtils.currentRedoNameQuery(), + rs -> { + while (rs.next()) { + fileNames.add(rs.getString(1)); + } + }); + LOGGER.trace("Current redo log filenames: {}", fileNames); + return fileNames; + } + /** * Get the current redo log sequence(s). * @@ -461,6 +616,410 @@ private void pauseBetweenMiningSessions() throws InterruptedException { Metronome.sleeper(period, clock).pause(); } + /** + * Sets the NLS parameters for the mining session. + * + * @param connection database connection, should not be {@code null} + * @throws SQLException if a database exception occurred + */ + private void setNlsSessionParameters(OracleConnection connection) throws SQLException { + final String nlsSessionParameters = + "ALTER SESSION SET " + + " NLS_DATE_FORMAT = 'YYYY-MM-DD HH24:MI:SS'" + + " NLS_TIMESTAMP_FORMAT = 'YYYY-MM-DD HH24:MI:SS.FF'" + + " NLS_TIMESTAMP_TZ_FORMAT = 'YYYY-MM-DD HH24:MI:SS.FF TZH:TZM'" + + " NLS_NUMERIC_CHARACTERS = '.,'"; + + connection.executeWithoutCommitting(nlsSessionParameters); + // This is necessary so that TIMESTAMP WITH LOCAL TIME ZONE is returned in UTC + connection.executeWithoutCommitting("ALTER SESSION SET TIME_ZONE = '00:00'"); + } + + /** + * Get the database system time in the database system's time zone. + * + * @param connection database connection, should not be {@code null} + * @return the database system time + * @throws SQLException if a database exception occurred + */ + private OffsetDateTime getDatabaseSystemTime(OracleConnection connection) throws SQLException { + return connection.singleOptionalValue( + "SELECT SYSTIMESTAMP FROM DUAL", rs -> rs.getObject(1, OffsetDateTime.class)); + } + + /** + * Starts a new Oracle LogMiner session. + * + *

When this is called, LogMiner prepares all the necessary state for an upcoming LogMiner + * view query. If the mining statement defines using DDL tracking, the data dictionary will be + * mined as a part of this call to prepare DDL tracking state for the upcoming LogMiner view + * query. + * + * @param connection database connection, should not be {@code null} + * @param startScn mining session's starting system change number (exclusive), should not be + * {@code null} + * @param endScn mining session's ending system change number (inclusive), can be {@code null} + * @param attempts the number of mining start attempts + * @return true if the session was started successfully, false if it should be retried + * @throws SQLException if mining session failed to start + */ + public boolean startMiningSession( + OracleConnection connection, Scn startScn, Scn endScn, int attempts) + throws SQLException { + LOGGER.trace( + "Starting mining session startScn={}, endScn={}, strategy={}, continuous={}", + startScn, + endScn, + strategy, + isContinuousMining); + try { + Instant start = Instant.now(); + // NOTE: we treat startSCN as the _exclusive_ lower bound for mining, + // whereas START_LOGMNR takes an _inclusive_ lower bound, hence the increment. + connection.executeWithoutCommitting( + SqlUtils.startLogMinerStatement( + startScn.add(Scn.ONE), endScn, strategy, isContinuousMining)); + streamingMetrics.addCurrentMiningSessionStart(Duration.between(start, Instant.now())); + return true; + } catch (SQLException e) { + if (e.getErrorCode() == 1291 || e.getMessage().startsWith("ORA-01291")) { + if (attempts <= MINING_START_RETRIES) { + LOGGER.warn("Failed to start Oracle LogMiner session, retrying..."); + return false; + } + LOGGER.error( + "Failed to start Oracle LogMiner after '{}' attempts.", + MINING_START_RETRIES, + e); + } + LOGGER.error("Got exception when starting mining session.", e); + // Capture the database state before throwing the exception up + LogMinerDatabaseStateWriter.write(connection); + throw e; + } + } + + /** + * End the current Oracle LogMiner session, if one is in progress. If the current session does + * not have an active mining session, a log message is recorded and the method is a no-op. + * + * @param connection database connection, should not be {@code null} + * @param offsetContext connector offset context, should not be {@code null} + * @throws SQLException if the current mining session cannot be ended gracefully + */ + public void endMiningSession(OracleConnection connection, OracleOffsetContext offsetContext) + throws SQLException { + try { + LOGGER.trace( + "Ending log mining startScn={}, endScn={}, offsetContext.getScn={}, strategy={}, continuous={}", + startScn, + endScn, + offsetContext.getScn(), + strategy, + isContinuousMining); + connection.executeWithoutCommitting("BEGIN SYS.DBMS_LOGMNR.END_LOGMNR(); END;"); + } catch (SQLException e) { + if (e.getMessage().toUpperCase().contains("ORA-01307")) { + LOGGER.info("LogMiner mining session is already closed."); + return; + } + // LogMiner failed to terminate properly, a restart of the connector will be required. + throw e; + } + } + + /** + * Calculates the mining session's end system change number. + * + *

This calculation is based upon a sliding window algorithm to where if the connector is + * falling behind, the mining session's end point will be calculated based on the batch size and + * either be increased up to the maximum batch size or reduced to as low as the minimum batch + * size. + * + *

Additionally, this method calculates and maintains a sliding algorithm for the sleep time + * between the mining sessions, increasing the pause up to the maximum sleep time if the + * connector is not behind or is mining too quick and reducing the pause down to the mimum sleep + * time if the connector has fallen behind and needs to catch-up faster. + * + * @param connection database connection, should not be {@code null} + * @param startScn upcoming mining session's starting change number, should not be {@code null} + * @param prevEndScn last mining session's ending system change number, can be {@code null} + * @return the ending system change number to be used for the upcoming mining session, never + * {@code null} + * @throws SQLException if the current max system change number cannot be obtained from the + * database + */ + private Scn calculateEndScn(OracleConnection connection, Scn startScn, Scn prevEndScn) + throws SQLException { + Scn currentScn = + archiveLogOnlyMode + ? getMaxArchiveLogScn(currentLogFiles) + : connection.getCurrentScn(); + streamingMetrics.setCurrentScn(currentScn); + + // Add the current batch size to the starting system change number + final Scn currentBatchSizeScn = Scn.valueOf(streamingMetrics.getBatchSize()); + Scn topScnToMine = startScn.add(currentBatchSizeScn); + + // Control adjusting batch size + boolean topMiningScnInFarFuture = false; + if (topScnToMine.subtract(currentScn).compareTo(currentBatchSizeScn) > 0) { + streamingMetrics.changeBatchSize(false, connectorConfig.isLobEnabled()); + topMiningScnInFarFuture = true; + } + if (currentScn.subtract(topScnToMine).compareTo(currentBatchSizeScn) > 0) { + streamingMetrics.changeBatchSize(true, connectorConfig.isLobEnabled()); + } + + // Control sleep time to reduce database impact + if (currentScn.compareTo(topScnToMine) < 0) { + if (!topMiningScnInFarFuture) { + streamingMetrics.changeSleepingTime(true); + } + LOGGER.debug("Using current SCN {} as end SCN.", currentScn); + return currentScn; + } else { + if (prevEndScn != null && topScnToMine.compareTo(prevEndScn) <= 0) { + LOGGER.debug( + "Max batch size too small, using current SCN {} as end SCN.", currentScn); + return currentScn; + } + streamingMetrics.changeSleepingTime(false); + if (topScnToMine.compareTo(startScn) < 0) { + LOGGER.debug( + "Top SCN calculation resulted in end before start SCN, using current SCN {} as end SCN.", + currentScn); + return currentScn; + } + + if (prevEndScn != null) { + final Scn deltaScn = currentScn.subtract(prevEndScn); + if (deltaScn.compareTo( + Scn.valueOf( + connectorConfig.getLogMiningScnGapDetectionGapSizeMin())) + > 0) { + Optional prevEndScnTimestamp = + connection.getScnToTimestamp(prevEndScn); + if (prevEndScnTimestamp.isPresent()) { + Optional currentScnTimestamp = + connection.getScnToTimestamp(currentScn); + if (currentScnTimestamp.isPresent()) { + long timeDeltaMs = + ChronoUnit.MILLIS.between( + prevEndScnTimestamp.get(), currentScnTimestamp.get()); + if (timeDeltaMs + < connectorConfig + .getLogMiningScnGapDetectionTimeIntervalMaxMs()) { + LOGGER.warn( + "Detected possible SCN gap, using current SCN, startSCN {}, prevEndScn {} timestamp {}, current SCN {} timestamp {}.", + startScn, + prevEndScn, + prevEndScnTimestamp.get(), + currentScn, + currentScnTimestamp.get()); + return currentScn; + } + } + } + } + } + + LOGGER.debug( + "Using Top SCN calculation {} as end SCN. currentScn {}, startScn {}", + topScnToMine, + currentScn, + startScn); + return topScnToMine; + } + } + + /** + * Checks and validates the database's supplemental logging configuration as well as the lengths + * of the table and column names that are part of the database schema. + * + * @param connection database connection, should not be {@code null} + * @param pdbName pluggable database name, can be {@code null} when not using pluggable + * databases + * @param schema connector's database schema, should not be {@code null} + * @throws SQLException if a database exception occurred + */ + private void checkDatabaseAndTableState( + OracleConnection connection, String pdbName, OracleDatabaseSchema schema) + throws SQLException { + final Instant start = Instant.now(); + LOGGER.trace( + "Checking database and table state, this may take time depending on the size of your schema."); + try { + if (pdbName != null) { + connection.setSessionToPdb(pdbName); + } + + // Check if ALL supplemental logging is enabled at the database + if (!isDatabaseAllSupplementalLoggingEnabled(connection)) { + // Check if MIN supplemental logging is enabled at the database + if (!isDatabaseMinSupplementalLoggingEnabled(connection)) { + throw new DebeziumException( + "Supplemental logging not properly configured. " + + "Use: ALTER DATABASE ADD SUPPLEMENTAL LOG DATA"); + } + + // Check if ALL COLUMNS supplemental logging is enabled for each captured table + for (TableId tableId : schema.tableIds()) { + if (!connection.isTableExists(tableId)) { + LOGGER.warn( + "Database table '{}' no longer exists, supplemental log check skipped", + tableId); + } else if (!isTableAllColumnsSupplementalLoggingEnabled(connection, tableId)) { + LOGGER.warn( + "Database table '{}' not configured with supplemental logging \"(ALL) COLUMNS\"; " + + "only explicitly changed columns will be captured. " + + "Use: ALTER TABLE {}.{} ADD SUPPLEMENTAL LOG DATA (ALL) COLUMNS", + tableId, + tableId.schema(), + tableId.table()); + } + final Table table = schema.tableFor(tableId); + if (table == null) { + // This should never happen; however in the event something would cause it + // we can + // at least get the table identifier thrown in the error to debug from + // rather + // than an erroneous NPE + throw new DebeziumException( + "Unable to find table in relational model: " + tableId); + } + checkTableColumnNameLengths(table); + } + } else { + // ALL supplemental logging is enabled, now check table/column lengths + for (TableId tableId : schema.tableIds()) { + final Table table = schema.tableFor(tableId); + if (table == null) { + // This should never happen; however in the event something would cause it + // we can + // at least get the table identifier thrown in the error to debug from + // rather + // than an erroneous NPE + throw new DebeziumException( + "Unable to find table in relational model: " + tableId); + } + checkTableColumnNameLengths(table); + } + } + } finally { + if (pdbName != null) { + connection.resetSessionToCdb(); + } + } + LOGGER.trace( + "Database and table state check finished after {} ms", + Duration.between(start, Instant.now()).toMillis()); + } + + /** + * Examines the table and column names and logs a warning if any name exceeds {@link + * #MAXIMUM_NAME_LENGTH}. + * + * @param table the table, should not be {@code null} + */ + private void checkTableColumnNameLengths(Table table) { + if (table.id().table().length() > MAXIMUM_NAME_LENGTH) { + LOGGER.warn( + "Table '{}' won't be captured by Oracle LogMiner because its name exceeds {} characters.", + table.id().table(), + MAXIMUM_NAME_LENGTH); + } + for (Column column : table.columns()) { + if (column.name().length() > MAXIMUM_NAME_LENGTH) { + LOGGER.warn( + "Table '{}' won't be captured by Oracle LogMiner because column '{}' exceeds {} characters.", + table.id().table(), + column.name(), + MAXIMUM_NAME_LENGTH); + } + } + } + + /** + * Returns whether the database is configured with ALL supplemental logging. + * + * @param connection database connection, must not be {@code null} + * @return true if all supplemental logging is enabled, false otherwise + * @throws SQLException if a database exception occurred + */ + private boolean isDatabaseAllSupplementalLoggingEnabled(OracleConnection connection) + throws SQLException { + return connection.queryAndMap( + SqlUtils.databaseSupplementalLoggingAllCheckQuery(), + rs -> { + while (rs.next()) { + if ("YES".equalsIgnoreCase(rs.getString(2))) { + return true; + } + } + return false; + }); + } + + /** + * Returns whether the database is configured with MIN supplemental logging. + * + * @param connection database connection, must not be {@code null} + * @return true if min supplemental logging is enabled, false otherwise + * @throws SQLException if a database exception occurred + */ + private boolean isDatabaseMinSupplementalLoggingEnabled(OracleConnection connection) + throws SQLException { + return connection.queryAndMap( + SqlUtils.databaseSupplementalLoggingMinCheckQuery(), + rs -> { + while (rs.next()) { + if ("YES".equalsIgnoreCase(rs.getString(2))) { + return true; + } + } + return false; + }); + } + + /** + * Return whether the table is configured with ALL COLUMN supplemental logging. + * + * @param connection database connection, must not be {@code null} + * @param tableId table identifier, must not be {@code null} + * @return true if all column supplemental logging is enabled, false otherwise + * @throws SQLException if a database exception occurred + */ + private boolean isTableAllColumnsSupplementalLoggingEnabled( + OracleConnection connection, TableId tableId) throws SQLException { + // A table can be defined with multiple logging groups, hence why this check needs to + // iterate + // multiple returned rows to see whether ALL_COLUMN_LOGGING is part of the set. + return connection.queryAndMap( + SqlUtils.tableSupplementalLoggingCheckQuery(tableId), + rs -> { + while (rs.next()) { + if (ALL_COLUMN_LOGGING.equals(rs.getString(2))) { + return true; + } + } + return false; + }); + } + + /** + * Resolves the Oracle LGWR buffer flushing strategy. + * + * @return the strategy to be used to flush Oracle's LGWR process, never {@code null}. + */ + private LogWriterFlushStrategy resolveFlushStrategy() { + if (connectorConfig.isRacSystem()) { + return new RacCommitLogWriterFlushStrategy( + connectorConfig, jdbcConfiguration, streamingMetrics); + } + return new CommitLogWriterFlushStrategy(jdbcConnection); + } + /** * Waits for the starting system change number to exist in the archive logs before returning. * diff --git a/flink-connector-oracle-cdc/src/main/java/io/debezium/connector/oracle/logminer/TransactionalBuffer.java b/flink-connector-oracle-cdc/src/main/java/io/debezium/connector/oracle/logminer/TransactionalBuffer.java deleted file mode 100644 index 996b03a490f..00000000000 --- a/flink-connector-oracle-cdc/src/main/java/io/debezium/connector/oracle/logminer/TransactionalBuffer.java +++ /dev/null @@ -1,1321 +0,0 @@ -/* - * Copyright 2022 Ververica Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.debezium.connector.oracle.logminer; - -import io.debezium.DebeziumException; -import io.debezium.annotation.NotThreadSafe; -import io.debezium.connector.oracle.BlobChunkList; -import io.debezium.connector.oracle.OracleConnectorConfig; -import io.debezium.connector.oracle.OracleDatabaseSchema; -import io.debezium.connector.oracle.OracleOffsetContext; -import io.debezium.connector.oracle.OracleStreamingChangeEventSourceMetrics; -import io.debezium.connector.oracle.Scn; -import io.debezium.connector.oracle.logminer.parser.SelectLobParser; -import io.debezium.connector.oracle.logminer.valueholder.LogMinerDmlEntry; -import io.debezium.pipeline.ErrorHandler; -import io.debezium.pipeline.EventDispatcher; -import io.debezium.pipeline.source.spi.ChangeEventSource; -import io.debezium.relational.Table; -import io.debezium.relational.TableId; -import io.debezium.util.Clock; -import org.apache.kafka.connect.errors.DataException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.sql.Timestamp; -import java.time.Duration; -import java.time.Instant; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Set; -import java.util.function.Supplier; - -/** - * Copied from https://github.com/debezium/debezium project to fix - * https://issues.redhat.com/browse/DBZ-4936 for 1.6.4.Final version. - * - *

Line 370 : Use `offsetContext.getCommitScn().compareTo(scn) >= 0` instead of - * `offsetContext.getCommitScn().compareTo(scn) > 0`. We should remove this class since we bumped - * higher debezium version after 1.9.1.Final where the issue has been fixed. - */ -@NotThreadSafe -public final class TransactionalBuffer implements AutoCloseable { - - private static final Logger LOGGER = LoggerFactory.getLogger(TransactionalBuffer.class); - - private final OracleConnectorConfig connectorConfig; - private final Map transactions; - private final OracleDatabaseSchema schema; - private final Clock clock; - private final ErrorHandler errorHandler; - private final Set abandonedTransactionIds; - private final Set rolledBackTransactionIds; - private final Set recentlyCommittedTransactionIds; - private final Set recentlyEmittedDdls; - private final OracleStreamingChangeEventSourceMetrics streamingMetrics; - - private Scn lastCommittedScn; - private Scn maxCommittedScn; - - /** - * Constructor to create a new instance. - * - * @param connectorConfig connector configuration, should not be {@code null} - * @param schema database schema - * @param clock system clock - * @param errorHandler the connector error handler - * @param streamingMetrics the streaming metrics - */ - TransactionalBuffer( - OracleConnectorConfig connectorConfig, - OracleDatabaseSchema schema, - Clock clock, - ErrorHandler errorHandler, - OracleStreamingChangeEventSourceMetrics streamingMetrics) { - this.transactions = new HashMap<>(); - this.connectorConfig = connectorConfig; - this.schema = schema; - this.clock = clock; - this.errorHandler = errorHandler; - this.lastCommittedScn = Scn.NULL; - this.maxCommittedScn = Scn.NULL; - this.abandonedTransactionIds = new HashSet<>(); - this.rolledBackTransactionIds = new HashSet<>(); - this.recentlyCommittedTransactionIds = new HashSet<>(); - this.recentlyEmittedDdls = new HashSet<>(); - this.streamingMetrics = streamingMetrics; - } - - /** @return rolled back transactions */ - Set getRolledBackTransactionIds() { - return new HashSet<>(rolledBackTransactionIds); - } - - /** - * Registers a DDL operation with the buffer. - * - * @param scn the system change number - */ - void registerDdlOperation(Scn scn) { - if (connectorConfig.isLobEnabled()) { - recentlyEmittedDdls.add(scn); - } - } - - /** - * Returns whether the ddl operation has been registered. - * - * @param scn the system change number - * @return true if the ddl operation has been seen and processed, false otherwise. - */ - boolean isDdlOperationRegistered(Scn scn) { - return recentlyEmittedDdls.contains(scn); - } - - /** - * Register a DML operation with the transaction buffer. - * - * @param operation operation type - * @param transactionId unique transaction identifier - * @param scn system change number - * @param tableId table identifier - * @param entrySupplier parser entry provider - * @param changeTime time the DML operation occurred - * @param rowId unique row identifier - * @param rsId rollback sequence identifier - */ - void registerDmlOperation( - int operation, - String transactionId, - Scn scn, - TableId tableId, - Supplier entrySupplier, - Instant changeTime, - String rowId, - Object rsId) { - if (registerEvent( - transactionId, - scn, - changeTime, - () -> new DmlEvent(operation, entrySupplier.get(), scn, tableId, rowId, rsId))) { - streamingMetrics.incrementRegisteredDmlCount(); - } - } - - /** - * Register a {@code SEL_LOB_LOCATOR} operation with the transaction buffer. - * - * @param operation operation type - * @param transactionId unique transaction identifier - * @param scn system change number - * @param tableId table identifier - * @param changeTime time the operation occurred - * @param rowId unique row identifier - * @param rsId rollback sequence identifier - * @param segOwner table owner - * @param tableName table name - * @param redoSql the redo sql statement - * @param table the relational table - * @param selectLobParser the select LOB parser - */ - void registerSelectLobOperation( - int operation, - String transactionId, - Scn scn, - TableId tableId, - Instant changeTime, - String rowId, - Object rsId, - String segOwner, - String tableName, - String redoSql, - Table table, - SelectLobParser selectLobParser) { - registerEvent( - transactionId, - scn, - changeTime, - () -> { - final LogMinerDmlEntry entry = selectLobParser.parse(redoSql, table); - entry.setObjectOwner(segOwner); - entry.setObjectName(tableName); - return new SelectLobLocatorEvent( - operation, - entry, - selectLobParser.getColumnName(), - selectLobParser.isBinary(), - scn, - tableId, - rowId, - rsId); - }); - } - - /** - * Register a {@code LOB_WRITE} operation with the transaction buffer. - * - * @param operation operation type - * @param transactionId unique transaction identifier - * @param scn system change number - * @param tableId table identifier - * @param data data written by the LOB operation - * @param changeTime time the operation occurred - * @param rowId unique row identifier - * @param rsId rollback sequence identifier - */ - void registerLobWriteOperation( - int operation, - String transactionId, - Scn scn, - TableId tableId, - String data, - Instant changeTime, - String rowId, - Object rsId) { - if (data != null) { - registerEvent( - transactionId, - scn, - changeTime, - () -> - new LobWriteEvent( - operation, parseLobWriteSql(data), scn, tableId, rowId, rsId)); - } - } - - /** - * Register a {@code LOB_ERASE} operation with the transction buffer. - * - * @param operation operation type - * @param transactionId unique transaction identifier - * @param scn system change number - * @param tableId table identifier - * @param changeTime time the operation occurred - * @param rowId unique row identifier - * @param rsId rollback sequence identifier - */ - void registerLobEraseOperation( - int operation, - String transactionId, - Scn scn, - TableId tableId, - Instant changeTime, - String rowId, - Object rsId) { - registerEvent( - transactionId, - scn, - changeTime, - () -> new LobEraseEvent(operation, scn, tableId, rowId, rsId)); - } - - /** - * Undo a staged DML operation in the transaction buffer. - * - * @param transactionId unique transaction identifier - * @param undoRowId unique row identifier to be undone - * @param tableId table identifier - */ - void undoDmlOperation(String transactionId, String undoRowId, TableId tableId) { - Transaction transaction = transactions.get(transactionId); - if (transaction == null) { - LOGGER.warn( - "Cannot undo changes to {} with row id {} as transaction {} not found.", - tableId, - undoRowId, - transactionId); - return; - } - - transaction.events.removeIf( - o -> { - if (o.getRowId().equals(undoRowId)) { - LOGGER.trace( - "Undoing change to {} with row id {} in transaction {}", - tableId, - undoRowId, - transactionId); - return true; - } - return false; - }); - } - - /** - * Register a new transaction with the transaction buffer. - * - * @param transactionId unique transaction identifier - * @param scn starting SCN of the transaction - */ - void registerTransaction(String transactionId, Scn scn) { - Transaction transaction = transactions.get(transactionId); - if (transaction == null && !isRecentlyCommitted(transactionId)) { - transactions.put(transactionId, new Transaction(transactionId, scn)); - streamingMetrics.setActiveTransactions(transactions.size()); - } else if (transaction != null && !isRecentlyCommitted(transactionId)) { - LOGGER.trace( - "Transaction {} is not yet committed and START event detected, reset eventIds.", - transactionId); - // Since the transaction hasn't been committed and the START transaction was re-mined, - // reset the event id counter for the transaction so that any events pulled from the - // event stream are added at the right index offsets. - transaction.eventIds = 0; - } - } - - /** - * Commits a transaction by looking up the transaction in the buffer and if exists, all - * registered callbacks will be executed in chronological order, emitting events for each - * followed by a transaction commit event. - * - * @param transactionId transaction identifier - * @param scn SCN of the commit. - * @param offsetContext Oracle offset - * @param timestamp commit timestamp - * @param context context to check that source is running - * @param debugMessage message - * @param dispatcher event dispatcher - * @return true if committed transaction is in the buffer, was not processed yet and processed - * now - */ - boolean commit( - String transactionId, - Scn scn, - OracleOffsetContext offsetContext, - Timestamp timestamp, - ChangeEventSource.ChangeEventSourceContext context, - String debugMessage, - EventDispatcher dispatcher) { - - Instant start = Instant.now(); - Transaction transaction = transactions.remove(transactionId); - if (transaction == null) { - return false; - } - - Scn smallestScn = calculateSmallestScn(); - - abandonedTransactionIds.remove(transactionId); - - if (isRecentlyCommitted(transactionId)) { - return false; - } - - // On the restarting connector, we start from SCN in the offset. There is possibility to - // commit a transaction(s) which were already committed. - // Currently we cannot use ">=", because we may lose normal commit which may happen at the - // same time. TODO use audit table to prevent duplications - if ((offsetContext.getCommitScn() != null - && offsetContext.getCommitScn().compareTo(scn) >= 0) - || lastCommittedScn.compareTo(scn) > 0) { - LOGGER.debug( - "Transaction {} already processed, ignored. Committed SCN in offset is {}, commit SCN of the transaction is {}, last committed SCN is {}", - transactionId, - offsetContext.getCommitScn(), - scn, - lastCommittedScn); - streamingMetrics.setActiveTransactions(transactions.size()); - return false; - } - - reconcileTransaction(transaction); - - LOGGER.trace("COMMIT, {}, smallest SCN: {}", debugMessage, smallestScn); - try { - int counter = transaction.events.size(); - for (LogMinerEvent event : transaction.events) { - if (!context.isRunning()) { - return false; - } - - // Update SCN in offset context only if processed SCN less than SCN among other - // transactions - if (smallestScn == null || scn.compareTo(smallestScn) < 0) { - offsetContext.setScn(event.getScn()); - streamingMetrics.setOldestScn(event.getScn()); - } - - offsetContext.setTransactionId(transaction.transactionId); - offsetContext.setSourceTime(timestamp.toInstant()); - offsetContext.setTableId(event.getTableId()); - if (--counter == 0) { - offsetContext.setCommitScn(scn); - } - - LOGGER.trace("Processing event {}", event); - dispatcher.dispatchDataChangeEvent( - event.getTableId(), - new LogMinerChangeRecordEmitter( - offsetContext, - event.getOperation(), - event.getEntry().getOldValues(), - event.getEntry().getNewValues(), - schema.tableFor(event.getTableId()), - clock)); - } - - lastCommittedScn = Scn.valueOf(scn.longValue()); - if (!transaction.events.isEmpty()) { - dispatcher.dispatchTransactionCommittedEvent(offsetContext); - } else { - dispatcher.dispatchHeartbeatEvent(offsetContext); - } - - streamingMetrics.calculateLagMetrics(timestamp.toInstant()); - - if (lastCommittedScn.compareTo(maxCommittedScn) > 0) { - LOGGER.trace("Updated transaction buffer max commit SCN to '{}'", lastCommittedScn); - maxCommittedScn = lastCommittedScn; - } - - if (connectorConfig.isLobEnabled()) { - // cache recent transaction and commit scn for handling offset updates - recentlyCommittedTransactionIds.add( - new RecentlyCommittedTransaction(transaction, scn)); - } - } catch (InterruptedException e) { - LogMinerHelper.logError(streamingMetrics, "Commit interrupted", e); - Thread.currentThread().interrupt(); - } catch (Exception e) { - errorHandler.setProducerThrowable(e); - } finally { - streamingMetrics.incrementCommittedTransactions(); - streamingMetrics.setActiveTransactions(transactions.size()); - streamingMetrics.incrementCommittedDmlCount(transaction.events.size()); - streamingMetrics.setCommittedScn(scn); - streamingMetrics.setOffsetScn(offsetContext.getScn()); - streamingMetrics.setLastCommitDuration(Duration.between(start, Instant.now())); - } - - return true; - } - - /** - * Update the offset context based on the current state of the transaction buffer. - * - * @param offsetContext offset context, should not be {@code null} - * @param dispatcher event dispatcher, should not be {@code null} - * @return offset context SCN, never {@code null} - * @throws InterruptedException thrown if dispatch of heartbeat event fails - */ - Scn updateOffsetContext(OracleOffsetContext offsetContext, EventDispatcher dispatcher) - throws InterruptedException { - if (transactions.isEmpty()) { - if (!maxCommittedScn.isNull()) { - LOGGER.trace( - "Transaction buffer is empty, updating offset SCN to '{}'", - maxCommittedScn); - offsetContext.setScn(maxCommittedScn); - dispatcher.dispatchHeartbeatEvent(offsetContext); - } else { - LOGGER.trace( - "No max committed SCN detected, offset SCN still '{}'", - offsetContext.getScn()); - } - } else { - Scn minStartScn = getMinimumScn(); - if (!minStartScn.isNull()) { - LOGGER.trace("Removing all commits up to SCN '{}'", minStartScn); - recentlyCommittedTransactionIds.removeIf( - t -> t.firstScn.compareTo(minStartScn) < 0); - LOGGER.trace("Removing all tracked DDL operations up to SCN '{}'", minStartScn); - recentlyEmittedDdls.removeIf(scn -> scn.compareTo(minStartScn) < 0); - offsetContext.setScn(minStartScn.subtract(Scn.valueOf(1))); - dispatcher.dispatchHeartbeatEvent(offsetContext); - } else { - LOGGER.trace("Minimum SCN in transaction buffer is still SCN '{}'", minStartScn); - } - } - return offsetContext.getScn(); - } - - Scn getMinimumScn() { - return transactions.values().stream() - .map(t -> t.firstScn) - .min(Scn::compareTo) - .orElse(Scn.NULL); - } - - /** - * Clears registered callbacks for given transaction identifier. - * - * @param transactionId transaction id - * @param debugMessage message - * @return true if the rollback is for a transaction in the buffer - */ - boolean rollback(String transactionId, String debugMessage) { - - Transaction transaction = transactions.get(transactionId); - if (transaction != null) { - LOGGER.debug("Transaction rolled back: {}", debugMessage); - - transactions.remove(transactionId); - abandonedTransactionIds.remove(transactionId); - rolledBackTransactionIds.add(transactionId); - - streamingMetrics.setActiveTransactions(transactions.size()); - streamingMetrics.incrementRolledBackTransactions(); - streamingMetrics.addRolledBackTransactionId(transactionId); - - return true; - } - - return false; - } - - /** - * If for some reason the connector got restarted, the offset will point to the beginning of the - * oldest captured transaction. If that transaction was lasted for a long time, let say > 4 - * hours, the offset might be not accessible after restart, Hence we have to address these cases - * manually. - * - *

In case of an abandonment, all DMLs/Commits/Rollbacks for this transaction will be ignored - * - * @param thresholdScn the smallest SVN of any transaction to keep in the buffer. All others - * will be removed. - * @param offsetContext the offset context - */ - void abandonLongTransactions(Scn thresholdScn, OracleOffsetContext offsetContext) { - LogMinerHelper.logWarn( - streamingMetrics, - "All transactions with first SCN <= {} will be abandoned, offset: {}", - thresholdScn, - offsetContext.getScn()); - Scn threshold = Scn.valueOf(thresholdScn.toString()); - Scn smallestScn = calculateSmallestScn(); - if (smallestScn == null) { - // no transactions in the buffer - return; - } - if (threshold.compareTo(smallestScn) < 0) { - threshold = smallestScn; - } - Iterator> iter = transactions.entrySet().iterator(); - while (iter.hasNext()) { - Map.Entry transaction = iter.next(); - if (transaction.getValue().firstScn.compareTo(threshold) <= 0) { - LogMinerHelper.logWarn( - streamingMetrics, - "Following long running transaction {} will be abandoned and ignored: {} ", - transaction.getKey(), - transaction.getValue().toString()); - abandonedTransactionIds.add(transaction.getKey()); - iter.remove(); - - streamingMetrics.addAbandonedTransactionId(transaction.getKey()); - streamingMetrics.setActiveTransactions(transactions.size()); - } - } - } - - boolean isTransactionRegistered(String txId) { - return transactions.get(txId) != null; - } - - private Scn calculateSmallestScn() { - Scn scn = - transactions.isEmpty() - ? null - : transactions.values().stream() - .map(transaction -> transaction.firstScn) - .min(Scn::compareTo) - .orElseThrow( - () -> new DataException("Cannot calculate smallest SCN")); - streamingMetrics.setOldestScn(scn == null ? Scn.valueOf(-1) : scn); - return scn; - } - - /** - * Returns {@code true} if buffer is empty, otherwise {@code false}. - * - * @return {@code true} if buffer is empty, otherwise {@code false} - */ - boolean isEmpty() { - return transactions.isEmpty(); - } - - @Override - public String toString() { - StringBuilder result = new StringBuilder(); - this.transactions.values().forEach(t -> result.append(t.toString())); - return result.toString(); - } - - @Override - public void close() { - transactions.clear(); - } - - /** - * Helper method to register a given {@link LogMinerEvent} implementation with the buffer. If - * the event is registered, the underlying metrics active transactions and lag will be - * re-calculated. - * - * @param transactionId transaction id that contained the given event - * @param scn system change number for the event - * @param changeTime the time the event occurred - * @param supplier supplier function to generate the event if validity checks pass - * @return true if the event was registered, false otherwise - */ - private boolean registerEvent( - String transactionId, Scn scn, Instant changeTime, Supplier supplier) { - if (abandonedTransactionIds.contains(transactionId)) { - LogMinerHelper.logWarn( - streamingMetrics, - "Event for abandoned transaction {}, ignored.", - transactionId); - return false; - } - if (rolledBackTransactionIds.contains(transactionId)) { - LogMinerHelper.logWarn( - streamingMetrics, - "Event for rolled back transaction {}, ignored.", - transactionId); - return false; - } - if (isRecentlyCommitted(transactionId)) { - LOGGER.trace( - "Event for transaction {} skipped, transaction already committed.", - transactionId); - return false; - } - - Transaction transaction = - transactions.computeIfAbsent( - transactionId, s -> new Transaction(transactionId, scn)); - streamingMetrics.setActiveTransactions(transactions.size()); - - int eventId = transaction.eventIds++; - if (transaction.events.size() > eventId) { - // only return true if new event is added, otherwise false - return false; - } else { - // Adding new event at eventId offset - LOGGER.trace( - "Transaction {}, adding event reference at index {}", transactionId, eventId); - transaction.events.add(supplier.get()); - streamingMetrics.calculateLagMetrics(changeTime); - return true; - } - } - - /** - * Returns whether the specified transaction has recently been committed. - * - * @param transactionId the transaction identifier - * @return true if the transaction has been recently committed (seen by the connector), - * otherwise false. - */ - private boolean isRecentlyCommitted(String transactionId) { - if (recentlyCommittedTransactionIds.isEmpty()) { - return false; - } - - for (RecentlyCommittedTransaction transaction : recentlyCommittedTransactionIds) { - if (transaction.transactionId.equals(transactionId)) { - return true; - } - } - return false; - } - - /** - * Parses a {@code LOB_WRITE} operation SQL fragment. - * - * @param sql sql statement - * @return the parsed statement - * @throws DebeziumException if an unexpected SQL fragment is provided that cannot be parsed - */ - private String parseLobWriteSql(String sql) { - if (sql == null) { - return null; - } - - int start = sql.indexOf(":= '"); - if (start != -1) { - // LOB_WRITE SQL is for a CLOB field - int end = sql.lastIndexOf("'"); - return sql.substring(start + 4, end); - } - - start = sql.indexOf(":= HEXTORAW"); - if (start != -1) { - // LOB_WRITE SQL is for a BLOB field - int end = sql.lastIndexOf("'") + 2; - return sql.substring(start + 3, end); - } - - throw new DebeziumException("Unable to parse unsupported LOB_WRITE SQL: " + sql); - } - - /** - * Reconcile the specified transaction by merging multiple events that should be emitted as a - * single logical event, such as changes made to LOB column types that involve multiple events. - * - * @param transaction transaction to be reconciled, never {@code null} - */ - private void reconcileTransaction(Transaction transaction) { - // Do not perform reconciliation if LOB support is not enabled. - if (!connectorConfig.isLobEnabled()) { - return; - } - - LOGGER.trace("Reconciling transaction {}", transaction.transactionId); - LogMinerEvent prevEvent = null; - - int prevEventSize = transaction.events.size(); - for (int i = 0; i < transaction.events.size(); ) { - - final LogMinerEvent event = transaction.events.get(i); - LOGGER.trace("Processing event {}", event); - - switch (event.getOperation()) { - case RowMapper.SELECT_LOB_LOCATOR: - if (shouldMergeSelectLobLocatorEvent( - transaction, i, (SelectLobLocatorEvent) event, prevEvent)) { - continue; - } - break; - case RowMapper.INSERT: - case RowMapper.UPDATE: - if (shouldMergeDmlEvent(transaction, i, (DmlEvent) event, prevEvent)) { - continue; - } - break; - } - - ++i; - prevEvent = event; - LOGGER.trace("Previous event is now {}", prevEvent); - } - - if (transaction.events.size() != prevEventSize) { - LOGGER.trace( - "Reconciled transaction {} from {} events to {}.", - transaction.transactionId, - prevEventSize, - transaction.events.size()); - } else { - LOGGER.trace("Transaction {} event queue was unmodified.", transaction.transactionId); - } - } - - /** - * Attempts to merge the provided SEL_LOB_LOCATOR event with the previous event in the - * transaction. - * - * @param transaction transaction being processed, never {@code null} - * @param index event index being processed - * @param event event being processed, never {@code null} - * @param prevEvent previous event in the transaction, can be {@code null} - * @return true if the event is merged, false if the event was not merged. - */ - private boolean shouldMergeSelectLobLocatorEvent( - Transaction transaction, - int index, - SelectLobLocatorEvent event, - LogMinerEvent prevEvent) { - LOGGER.trace("\tDetected SelectLobLocatorEvent for column '{}'", event.getColumnName()); - - final int columnIndex = - LogMinerHelper.getColumnIndexByName( - event.getColumnName(), schema.tableFor(event.getTableId())); - - // Read and combine all LOB_WRITE events that follow SEL_LOB_LOCATOR - Object lobData = null; - final List lobWrites = - readAndCombineLobWriteEvents(transaction, index, event.isBinaryData()); - if (!lobWrites.isEmpty()) { - if (event.isBinaryData()) { - // For BLOB we pass the list of string chunks as-is to the value converter - lobData = new BlobChunkList(lobWrites); - } else { - // For CLOB we go ahead and pre-process the List into a single string. - lobData = String.join("", lobWrites); - } - } - - // Read and consume all LOB_ERASE events that follow SEL_LOB_LOCATOR - final int lobEraseEvents = readAndConsumeLobEraseEvents(transaction, index); - if (lobEraseEvents > 0) { - LOGGER.warn( - "LOB_ERASE for table '{}' column '{}' is not supported, use DML operations to manipulate LOB columns only.", - event.getTableId(), - event.getColumnName()); - if (lobWrites.isEmpty()) { - // There are no write and only erase events, discard entire SEL_LOB_LOCATOR - // To simulate this, we treat this as a "merge" op so caller doesn't modify previous - // event - transaction.events.remove(index); - return true; - } - } else if (lobEraseEvents == 0 && lobWrites.isEmpty()) { - // There were no LOB operations present, discard entire SEL_LOB_LOCATOR - // To simulate this, we treat this as a "merge" op so caller doesn't modify previous - // event - transaction.events.remove(index); - return true; - } - - // SelectLobLocatorEvent can be treated as a parent DML operation where an update occurs on - // any - // LOB-based column. In this case, the event will be treated as an UPDATE event when - // emitted. - - if (prevEvent == null) { - // There is no prior event, add column to this SelectLobLocatorEvent and don't merge. - LOGGER.trace("\tAdding column '{}' to current event", event.getColumnName()); - event.getEntry().getNewValues()[columnIndex] = lobData; - return false; - } - - if (RowMapper.INSERT == prevEvent.getOperation()) { - // Previous event is an INSERT operation. - // Only merge the SEL_LOB_LOCATOR event if the previous INSERT is for the same table/row - // and if the INSERT's column value is EMPTY_CLOB() or EMPTY_BLOB() - if (isForSameTableOrScn(event, prevEvent)) { - LOGGER.trace("\tMerging SEL_LOB_LOCATOR with previous INSERT event"); - Object prevValue = prevEvent.getEntry().getNewValues()[columnIndex]; - if (!"EMPTY_CLOB()".equals(prevValue) && !"EMPTY_BLOB()".equals(prevValue)) { - throw new DebeziumException( - "Expected to find column '" - + event.getColumnName() - + "' in table '" - + prevEvent.getTableId() - + "' to be initialized as an empty LOB value.'"); - } - - prevEvent.getEntry().getNewValues()[columnIndex] = lobData; - - // Remove the SEL_LOB_LOCATOR event from event list and indicate merged. - transaction.events.remove(index); - return true; - } - } else if (RowMapper.UPDATE == prevEvent.getOperation()) { - // Previous event is an UPDATE operation. - // Only merge the SEL_LOB_LOCATOR event if the previous UPDATE is for the same table/row - if (isForSameTableOrScn(event, prevEvent) && isSameTableRow(event, prevEvent)) { - LOGGER.trace( - "\tUpdating SEL_LOB_LOCATOR column '{}' to previous UPDATE event", - event.getColumnName()); - prevEvent.getEntry().getNewValues()[columnIndex] = lobData; - - // Remove the SEL_LOB_LOCATOR event from event list and indicate merged. - transaction.events.remove(index); - return true; - } - } else if (RowMapper.SELECT_LOB_LOCATOR == prevEvent.getOperation()) { - // Previous event is a SEL_LOB_LOCATOR operation. - // Only merge the two SEL_LOB_LOCATOR events if they're for the same table/row - if (isForSameTableOrScn(event, prevEvent) && isSameTableRow(event, prevEvent)) { - LOGGER.trace( - "\tAdding column '{}' to previous SEL_LOB_LOCATOR event", - event.getColumnName()); - prevEvent.getEntry().getNewValues()[columnIndex] = lobData; - - // Remove the SEL_LOB_LOCATOR event from event list and indicate merged. - transaction.events.remove(index); - return true; - } - } else { - throw new DebeziumException( - "Unexpected previous event operation: " + prevEvent.getOperation()); - } - - LOGGER.trace("\tSEL_LOB_LOCATOR event is for different row, merge skipped."); - LOGGER.trace("\tAdding column '{}' to current event", event.getColumnName()); - event.getEntry().getNewValues()[columnIndex] = lobData; - return false; - } - - /** - * Attempts to merge the provided DML event with the previous event in the transaction. - * - * @param transaction transaction being processed, never {@code null} - * @param index event index being processed - * @param event event being processed, never {@code null} - * @param prevEvent previous event in the transaction, can be {@code null} - * @return true if the event is merged, false if the event was not merged - */ - private boolean shouldMergeDmlEvent( - Transaction transaction, int index, DmlEvent event, LogMinerEvent prevEvent) { - LOGGER.trace("\tDetected DmlEvent {}", event.getOperation()); - - if (prevEvent == null) { - // There is no prior event, therefore there is no reason to perform any merge. - return false; - } - - if (RowMapper.INSERT == prevEvent.getOperation()) { - // Previous event is an INSERT operation. - // The only valid combination here would be if the current event is an UPDATE since an - // INSERT cannot - // be merged with a prior INSERT with how LogMiner materializes the rows. - if (RowMapper.UPDATE == event.getOperation()) { - if (isForSameTableOrScn(event, prevEvent) && isSameTableRow(event, prevEvent)) { - LOGGER.trace("\tMerging UPDATE event with previous INSERT event"); - mergeNewColumns(event, prevEvent); - - // Remove the UPDATE event from event list and indicate merged. - transaction.events.remove(index); - return true; - } - } - } else if (RowMapper.UPDATE == prevEvent.getOperation()) { - // Previous event is an UPDATE operation. - // This will happen if there are non-CLOB and inline-CLOB fields updated in the same - // SQL. - // The inline-CLOB values should be merged with the previous UPDATE event. - if (RowMapper.UPDATE == event.getOperation()) { - if (isForSameTableOrScn(event, prevEvent) && isSameTableRow(event, prevEvent)) { - LOGGER.trace("\tMerging UPDATE event with previous UPDATE event"); - mergeNewColumns(event, prevEvent); - - // Remove the UPDATE event from event list and indicate merged. - transaction.events.remove(index); - return true; - } - } - } else if (RowMapper.SELECT_LOB_LOCATOR == prevEvent.getOperation()) { - // Previous event is a SEL_LOB_LOCATOR operation. - // SQL contained both non-inline CLOB and inline-CLOB field changes. - if (RowMapper.UPDATE == event.getOperation()) { - if (isForSameTableOrScn(event, prevEvent) && isSameTableRow(event, prevEvent)) { - LOGGER.trace("\tMerging UPDATE event with previous SEL_LOB_LOCATOR event"); - for (int i = 0; i < event.getEntry().getNewValues().length; ++i) { - Object value = event.getEntry().getNewValues()[i]; - Object prevValue = prevEvent.getEntry().getNewValues()[i]; - if (prevValue == null && value != null) { - LOGGER.trace( - "\tAdding column index {} to previous SEL_LOB_LOCATOR event", - i); - prevEvent.getEntry().getNewValues()[i] = value; - } - } - - // Remove the UPDATE event from event list and indicate merged. - transaction.events.remove(index); - return true; - } - } - } - - LOGGER.trace( - "\tDmlEvent {} event is for different row, merge skipped.", event.getOperation()); - return false; - } - - /** - * Reads the transaction event queue and combines all LOB_WRITE events starting at the provided - * index. for a SEL_LOB_LOCATOR event which is for binary data (BLOB) data types. - * - * @param transaction transaction being processed, never {@code null} - * @param index index to the first LOB_WRITE operation - * @return list of string-based values for each LOB_WRITE operation - */ - private List readAndCombineLobWriteEvents( - Transaction transaction, int index, boolean binaryData) { - List chunks = new ArrayList<>(); - for (int i = index + 1; i < transaction.events.size(); ++i) { - final LogMinerEvent event = transaction.events.get(i); - if (!(event instanceof LobWriteEvent)) { - break; - } - - final LobWriteEvent writeEvent = (LobWriteEvent) event; - if (binaryData - && !writeEvent.getData().startsWith("HEXTORAW('") - && !writeEvent.getData().endsWith("')")) { - throw new DebeziumException("Unexpected BLOB data chunk: " + writeEvent.getData()); - } - - chunks.add(writeEvent.getData()); - } - - if (!chunks.isEmpty()) { - LOGGER.trace("\tCombined {} LobWriteEvent events", chunks.size()); - // Remove events from the transaction queue queue - for (int i = 0; i < chunks.size(); ++i) { - transaction.events.remove(index + 1); - } - } - - return chunks; - } - - /** - * Read and remove all LobErase events detected in the transaction event queue. - * - * @param transaction transaction being processed, never {@code null} - * @param index index to the first LOB_ERASE operation - * @return number of LOB_ERASE events consumed and removed from the event queue - */ - private int readAndConsumeLobEraseEvents(Transaction transaction, int index) { - int events = 0; - for (int i = index + 1; i < transaction.events.size(); ++i) { - final LogMinerEvent event = transaction.events.get(i); - if (!(event instanceof LobEraseEvent)) { - break; - } - events++; - } - - if (events > 0) { - LOGGER.trace("\tConsumed {} LobErase events", events); - for (int i = 0; i < events; ++i) { - transaction.events.remove(index + 1); - } - } - - return events; - } - - /** - * Checks whether the two events are for the same table or participate in the same system - * change. - * - * @param event current event being processed, never {@code null} - * @param prevEvent previous/parent event that has been processed, may be {@code null} - * @return true if the two events are for the same table or system change number, false - * otherwise - */ - private boolean isForSameTableOrScn(LogMinerEvent event, LogMinerEvent prevEvent) { - if (prevEvent != null) { - if (event.getTableId().equals(prevEvent.getTableId())) { - return true; - } - return event.getScn().equals(prevEvent.getScn()) - && event.getRsId().equals(prevEvent.getRsId()); - } - return false; - } - - /** - * Checks whether the two events are for the same table row. - * - * @param event current event being processed, never {@code null} - * @param prevEvent previous/parent event that has been processed, never {@code null} - * @return true if the two events are for the same table row, false otherwise - */ - private boolean isSameTableRow(LogMinerEvent event, LogMinerEvent prevEvent) { - final Table table = schema.tableFor(event.getTableId()); - if (table == null) { - LOGGER.trace( - "Unable to locate table '{}' schema, unable to detect if same row.", - event.getTableId()); - return false; - } - for (String columnName : table.primaryKeyColumnNames()) { - int position = LogMinerHelper.getColumnIndexByName(columnName, table); - Object prevValue = prevEvent.getEntry().getNewValues()[position]; - if (prevValue == null) { - throw new DebeziumException( - "Could not find column " + columnName + " in previous event"); - } - Object value = event.getEntry().getNewValues()[position]; - if (value == null) { - throw new DebeziumException("Could not find column " + columnName + " in event"); - } - if (!Objects.equals(value, prevValue)) { - return false; - } - } - return true; - } - - /** - * Merge column values from {@code event} with {@code prevEvent}. - * - * @param event current event being processed, never {@code null} - * @param prevEvent previous/parent parent that has been processed, never {@code null} - */ - private void mergeNewColumns(LogMinerEvent event, LogMinerEvent prevEvent) { - final boolean prevEventIsInsert = RowMapper.INSERT == prevEvent.getOperation(); - - for (int i = 0; i < event.getEntry().getNewValues().length; ++i) { - Object value = event.getEntry().getNewValues()[i]; - Object prevValue = prevEvent.getEntry().getNewValues()[i]; - if (prevEventIsInsert && "EMPTY_CLOB()".equals(prevValue)) { - LOGGER.trace("\tAssigning column index {} with updated CLOB value.", i); - prevEvent.getEntry().getNewValues()[i] = value; - } else if (prevEventIsInsert && "EMPTY_BLOB()".equals(prevValue)) { - LOGGER.trace("\tAssigning column index {} with updated BLOB value.", i); - prevEvent.getEntry().getNewValues()[i] = value; - } else if (!prevEventIsInsert && value != null) { - LOGGER.trace("\tUpdating column index {} in previous event", i); - prevEvent.getEntry().getNewValues()[i] = value; - } - } - } - - /** - * Represents a transaction boundary that was recently committed. - * - *

This is used by the buffer to detect transactions read from overlapping mining sessions - * that can safely be ignored as the connector has already reconciled and emitted the event for - * it. - */ - private static final class RecentlyCommittedTransaction { - private final String transactionId; - private final Scn firstScn; - private final Scn commitScn; - - public RecentlyCommittedTransaction(Transaction transaction, Scn commitScn) { - this.transactionId = transaction.transactionId; - this.firstScn = transaction.firstScn; - this.commitScn = commitScn; - } - - public Scn getFirstScn() { - return firstScn; - } - - public Scn getCommitScn() { - return commitScn; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - RecentlyCommittedTransaction that = (RecentlyCommittedTransaction) o; - return Objects.equals(transactionId, that.transactionId) - && Objects.equals(firstScn, that.firstScn) - && Objects.equals(commitScn, that.commitScn); - } - - @Override - public int hashCode() { - return Objects.hash(transactionId, firstScn, commitScn); - } - } - - /** Represents a logical database transaction. */ - private static final class Transaction { - - private final String transactionId; - private final Scn firstScn; - private Scn lastScn; - private final List events; - private int eventIds; - - private Transaction(String transactionId, Scn firstScn) { - this.transactionId = transactionId; - this.firstScn = firstScn; - this.events = new ArrayList<>(); - this.lastScn = firstScn; - this.eventIds = 0; - } - - @Override - public String toString() { - return "Transaction{" - + "transactionId=" - + transactionId - + ", firstScn=" - + firstScn - + ", lastScn=" - + lastScn - + ", eventIds=" - + eventIds - + '}'; - } - } - - /** Base class for all possible LogMiner events. */ - private static class LogMinerEvent { - private final int operation; - private final LogMinerDmlEntry entry; - private final Scn scn; - private final TableId tableId; - private final String rowId; - private final Object rsId; - - public LogMinerEvent( - int operation, - LogMinerDmlEntry entry, - Scn scn, - TableId tableId, - String rowId, - Object rsId) { - this.operation = operation; - this.scn = scn; - this.tableId = tableId; - this.rowId = rowId; - this.rsId = rsId; - this.entry = entry; - } - - public int getOperation() { - return operation; - } - - public LogMinerDmlEntry getEntry() { - return entry; - } - - public Scn getScn() { - return scn; - } - - public TableId getTableId() { - return tableId; - } - - public String getRowId() { - return rowId; - } - - public Object getRsId() { - return rsId; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - LogMinerEvent event = (LogMinerEvent) o; - return operation == event.operation - && Objects.equals(entry, event.entry) - && Objects.equals(scn, event.scn) - && Objects.equals(tableId, event.tableId) - && Objects.equals(rowId, event.rowId) - && Objects.equals(rsId, event.rsId); - } - - @Override - public int hashCode() { - return Objects.hash(operation, entry, scn, tableId, rowId, rsId); - } - } - - /** Represents a DML event for a given table row. */ - private static class DmlEvent extends LogMinerEvent { - public DmlEvent( - int operation, - LogMinerDmlEntry entry, - Scn scn, - TableId tableId, - String rowId, - Object rsId) { - super(operation, entry, scn, tableId, rowId, rsId); - } - } - - /** Represents a SELECT_LOB_LOCATOR event. */ - private static class SelectLobLocatorEvent extends LogMinerEvent { - private final String columnName; - private final boolean binaryData; - - public SelectLobLocatorEvent( - int operation, - LogMinerDmlEntry entry, - String columnName, - boolean binaryData, - Scn scn, - TableId tableId, - String rowId, - Object rsId) { - super(operation, entry, scn, tableId, rowId, rsId); - this.columnName = columnName; - this.binaryData = binaryData; - } - - public String getColumnName() { - return columnName; - } - - public boolean isBinaryData() { - return binaryData; - } - } - - /** Represents a LOB_WRITE event. */ - private static class LobWriteEvent extends LogMinerEvent { - private final String data; - - public LobWriteEvent( - int operation, String data, Scn scn, TableId tableId, String rowId, Object rsId) { - super(operation, null, scn, tableId, rowId, rsId); - this.data = data; - } - - public String getData() { - return data; - } - } - - /** Represents a LOB_ERASE event. */ - private static class LobEraseEvent extends LogMinerEvent { - public LobEraseEvent(int operation, Scn scn, TableId tableId, String rowId, Object rsId) { - super(operation, null, scn, tableId, rowId, rsId); - } - } -} diff --git a/flink-connector-oracle-cdc/src/test/java/com/ververica/cdc/connectors/oracle/OracleSourceTest.java b/flink-connector-oracle-cdc/src/test/java/com/ververica/cdc/connectors/oracle/OracleSourceTest.java index 1de4b34e203..2dbbb508684 100644 --- a/flink-connector-oracle-cdc/src/test/java/com/ververica/cdc/connectors/oracle/OracleSourceTest.java +++ b/flink-connector-oracle-cdc/src/test/java/com/ververica/cdc/connectors/oracle/OracleSourceTest.java @@ -575,6 +575,8 @@ private OracleSource.Builder basicSourceBuilder(OracleContainer or Properties debeziumProperties = new Properties(); debeziumProperties.setProperty("debezium.log.mining.strategy", "online_catalog"); debeziumProperties.setProperty("debezium.log.mining.continuous.mine", "true"); + // ignore APEX XE system tables changes + debeziumProperties.setProperty("database.history.store.only.captured.tables.ddl", "true"); return OracleSource.builder() .hostname(oracleContainer.getHost()) .port(oracleContainer.getOraclePort()) diff --git a/flink-connector-oracle-cdc/src/test/java/com/ververica/cdc/connectors/oracle/source/OracleSourceITCase.java b/flink-connector-oracle-cdc/src/test/java/com/ververica/cdc/connectors/oracle/source/OracleSourceITCase.java index 1faad375382..49719fe7769 100644 --- a/flink-connector-oracle-cdc/src/test/java/com/ververica/cdc/connectors/oracle/source/OracleSourceITCase.java +++ b/flink-connector-oracle-cdc/src/test/java/com/ververica/cdc/connectors/oracle/source/OracleSourceITCase.java @@ -146,7 +146,8 @@ private void testOracleParallelSource( + " 'table-name' = '%s'," + " 'scan.incremental.snapshot.enabled' = 'false'," + " 'debezium.log.mining.strategy' = 'online_catalog'," - + " 'debezium.log.mining.continuous.mine' = 'true'" + + " 'debezium.log.mining.continuous.mine' = 'true'," + + " 'debezium.database.history.store.only.captured.tables.ddl' = 'true'" + ")", ORACLE_CONTAINER.getHost(), ORACLE_CONTAINER.getOraclePort(), diff --git a/flink-connector-oracle-cdc/src/test/java/com/ververica/cdc/connectors/oracle/table/OracleConnectorITCase.java b/flink-connector-oracle-cdc/src/test/java/com/ververica/cdc/connectors/oracle/table/OracleConnectorITCase.java index 96ae27d6f71..077f9d15531 100644 --- a/flink-connector-oracle-cdc/src/test/java/com/ververica/cdc/connectors/oracle/table/OracleConnectorITCase.java +++ b/flink-connector-oracle-cdc/src/test/java/com/ververica/cdc/connectors/oracle/table/OracleConnectorITCase.java @@ -23,8 +23,12 @@ import org.apache.flink.table.planner.factories.TestValuesTableFactory; import org.apache.flink.test.util.AbstractTestBase; +import org.apache.flink.shaded.guava30.com.google.common.collect.Lists; +import org.apache.flink.shaded.guava30.com.google.common.util.concurrent.RateLimiter; + import com.ververica.cdc.connectors.oracle.utils.OracleTestUtils; import org.junit.After; +import org.junit.Assume; import org.junit.Before; import org.junit.Ignore; import org.junit.Test; @@ -44,7 +48,15 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import java.util.stream.IntStream; import java.util.stream.Stream; import static com.ververica.cdc.connectors.oracle.source.OracleSourceTestBase.assertEqualsInAnyOrder; @@ -57,6 +69,8 @@ /** Integration tests for Oracle binlog SQL source. */ @RunWith(Parameterized.class) public class OracleConnectorITCase extends AbstractTestBase { + private static final int RECORDS_COUNT = 10_000; + private static final int WORKERS_COUNT = 4; private static final Logger LOG = LoggerFactory.getLogger(OracleConnectorITCase.class); @@ -122,6 +136,7 @@ public void testConsumingAllEvents() + " 'scan.incremental.snapshot.enabled' = '%s'," + " 'debezium.log.mining.strategy' = 'online_catalog'," + " 'debezium.log.mining.continuous.mine' = 'true'," + + " 'debezium.database.history.store.only.captured.tables.ddl' = 'true'," + " 'scan.incremental.snapshot.chunk.size' = '2'," + " 'database-name' = 'XE'," + " 'schema-name' = '%s'," @@ -231,6 +246,7 @@ public void testConsumingAllEventsByChunkKeyColumn() + " 'scan.incremental.snapshot.chunk.key-column' = 'ID'," + " 'debezium.log.mining.strategy' = 'online_catalog'," + " 'debezium.log.mining.continuous.mine' = 'true'," + + " 'debezium.database.history.store.only.captured.tables.ddl' = 'true'," + " 'scan.incremental.snapshot.chunk.size' = '2'," + " 'database-name' = 'XE'," + " 'schema-name' = '%s'," @@ -321,6 +337,8 @@ public void testMetadataColumns() throws Throwable { + " 'scan.incremental.snapshot.enabled' = '%s'," + " 'debezium.log.mining.strategy' = 'online_catalog'," + " 'debezium.log.mining.continuous.mine' = 'true'," + // + " 'debezium.database.history.store.only.captured.tables.ddl' = + // 'true'," + " 'scan.incremental.snapshot.chunk.size' = '2'," + " 'database-name' = 'XE'," + " 'schema-name' = '%s'," @@ -417,6 +435,7 @@ public void testStartupFromLatestOffset() throws Exception { + " 'scan.incremental.snapshot.enabled' = '%s'," + " 'debezium.log.mining.strategy' = 'online_catalog'," + " 'debezium.log.mining.continuous.mine' = 'true'," + + " 'debezium.database.history.store.only.captured.tables.ddl' = 'true'," + " 'database-name' = 'XE'," + " 'schema-name' = '%s'," + " 'table-name' = '%s' ," @@ -516,6 +535,7 @@ public void testConsumingNumericColumns() throws Exception { + " 'scan.incremental.snapshot.enabled' = '%s'," + " 'debezium.log.mining.strategy' = 'online_catalog'," + " 'debezium.log.mining.continuous.mine' = 'true'," + + " 'debezium.database.history.store.only.captured.tables.ddl' = 'true'," + " 'database-name' = 'XE'," + " 'schema-name' = '%s'," + " 'table-name' = '%s'" @@ -598,6 +618,7 @@ public void testXmlType() throws Exception { + " 'scan.incremental.snapshot.enabled' = '%s'," + " 'debezium.log.mining.strategy' = 'online_catalog'," + " 'debezium.log.mining.continuous.mine' = 'true'," + + " 'debezium.database.history.store.only.captured.tables.ddl' = 'true'," + " 'scan.incremental.snapshot.chunk.size' = '2'," + " 'database-name' = 'XE'," + " 'schema-name' = '%s'," @@ -708,6 +729,7 @@ public void testAllDataTypes() throws Throwable { + " 'scan.incremental.snapshot.enabled' = '%s'," + " 'debezium.log.mining.strategy' = 'online_catalog'," + " 'debezium.log.mining.continuous.mine' = 'true'," + + " 'debezium.database.history.store.only.captured.tables.ddl' = 'true'," + " 'scan.incremental.snapshot.chunk.size' = '2'," + " 'database-name' = 'XE'," + " 'schema-name' = '%s'," @@ -765,6 +787,134 @@ public void testAllDataTypes() throws Throwable { result.getJobClient().get().cancel().get(); } + @Test + public void testSnapshotToStreamingSwitchPendingTransactions() throws Exception { + Assume.assumeFalse(parallelismSnapshot); + + CompletableFuture finishFuture = createRecordInserters(); + + String sourceDDL = + String.format( + "CREATE TABLE messages (" + + " ID INT NOT NULL," + + " CATEGORY_NAME STRING" + + ") WITH (" + + " 'connector' = 'oracle-cdc'," + + " 'hostname' = '%s'," + + " 'port' = '%s'," + + " 'username' = '%s'," + + " 'password' = '%s'," + + " 'database-name' = '%s'," + + " 'schema-name' = '%s'," + + " 'table-name' = 'category'," + + " 'scan.incremental.snapshot.enabled' = 'false'," + + " 'debezium.log.mining.strategy' = 'online_catalog'," + + " 'debezium.database.history.store.only.captured.tables.ddl' = 'true'," + + " 'debezium.log.mining.continuous.mine' = 'true'" + + ")", + oracleContainer.getHost(), + oracleContainer.getOraclePort(), + "dbzuser", + "dbz", + "XE", + "debezium"); + + String sinkDDL = + "CREATE TABLE sink (" + + " ID INT," + + " message STRING" + + ") WITH (" + + " 'connector' = 'values'," + + " 'sink-insert-only' = 'false'" + + ")"; + tEnv.executeSql(sourceDDL); + tEnv.executeSql(sinkDDL); + + TableResult result = tEnv.executeSql("INSERT INTO sink SELECT * FROM messages"); + + finishFuture.get(10, TimeUnit.MINUTES); + LOG.info("all async runners were finished"); + + waitForSinkSize("sink", RECORDS_COUNT); + + List actual = + TestValuesTableFactory.getResults("sink").stream() + .map(s -> s.replaceFirst("\\+I\\[(\\d+).+", "$1")) + .map(Integer::parseInt) + .sorted() + .collect(Collectors.toList()); + + List expected = + IntStream.range(0, RECORDS_COUNT).boxed().collect(Collectors.toList()); + + assertEquals(expected, actual); + result.getJobClient().get().cancel().get(); + } + + @SuppressWarnings("unchecked") + private CompletableFuture createRecordInserters() { + int requestPerSecondPerThread = 100; + int recordsChunkSize = RECORDS_COUNT / WORKERS_COUNT; + int recordsToCommit = recordsChunkSize / 4; + + List runners = + IntStream.range(0, WORKERS_COUNT) + .mapToObj( + i -> + createRecordInserter( + requestPerSecondPerThread, + recordsChunkSize * i, + recordsChunkSize, + recordsToCommit)) + .collect(Collectors.toList()); + + ExecutorService executor = Executors.newFixedThreadPool(WORKERS_COUNT); + CompletableFuture[] completableFutures = + runners.stream() + .map(runnable -> CompletableFuture.runAsync(runnable, executor)) + .toArray(CompletableFuture[]::new); + + return CompletableFuture.allOf(completableFutures); + } + + private Runnable createRecordInserter( + int requestPerSecond, int startIndex, int recordsCnt, int recordsToCommit) { + return () -> { + Supplier messageSupplier = + createRandomSupplier( + Lists.newArrayList("msg1", "msg2", "msg3", "msg4", "msg5", "msg6")); + + RateLimiter rateLimiter = RateLimiter.create(requestPerSecond); + + try (Connection connection = getJdbcConnection(); + Statement statement = connection.createStatement()) { + + connection.setAutoCommit(false); + for (long i = startIndex; i < startIndex + recordsCnt; i++) { + rateLimiter.acquire(); + statement.execute( + String.format( + "INSERT INTO %s.%s VALUES (%d,'%s')", + "debezium", "category", i, messageSupplier.get())); + if (i % recordsToCommit == 0) { + LOG.info("Committing at id {}", i); + connection.commit(); + } + } + + connection.commit(); + } catch (SQLException e) { + e.printStackTrace(); + throw new RuntimeException(e); + } + }; + } + + private Supplier createRandomSupplier(List possibleValues) { + int size = possibleValues.size(); + return () -> possibleValues.get(ThreadLocalRandom.current().nextInt(size)); + } + // ------------------------------------------------------------------------------------ private static void waitForSnapshotStarted(String sinkName) throws InterruptedException { diff --git a/flink-connector-postgres-cdc/src/main/java/io/debezium/connector/postgresql/connection/Lsn.java b/flink-connector-postgres-cdc/src/main/java/io/debezium/connector/postgresql/connection/Lsn.java new file mode 100644 index 00000000000..deb7bb75424 --- /dev/null +++ b/flink-connector-postgres-cdc/src/main/java/io/debezium/connector/postgresql/connection/Lsn.java @@ -0,0 +1,149 @@ +/* + * Copyright Debezium Authors. + * + * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0 + */ + +package io.debezium.connector.postgresql.connection; + +import org.postgresql.replication.LogSequenceNumber; + +import java.nio.ByteBuffer; + +/** + * Copied from Debezium 1.9.7 without changes due to the NoSuchMethodError, caused by the fact that + * current Debezium release java version is 11, so we need to compile this file by java 8 compiler. + * More + * info. Abstraction of PostgreSQL log sequence number, adapted from {@link LogSequenceNumber}. + */ +public class Lsn implements Comparable { + + /** + * Zero is used indicate an invalid pointer. Bootstrap skips the first possible WAL segment, + * initializing the first WAL page at XLOG_SEG_SIZE, so no XLOG record can begin at zero. + */ + public static final Lsn INVALID_LSN = new Lsn(0); + + private final long value; + + private Lsn(long value) { + this.value = value; + } + + /** + * @param value numeric represent position in the write-ahead log stream + * @return not null LSN instance + */ + public static Lsn valueOf(Long value) { + if (value == null) { + return null; + } + if (value == 0) { + return INVALID_LSN; + } + return new Lsn(value); + } + + /** + * @param value PostgreSQL JDBC driver domain type representing position in the write-ahead log + * stream + * @return not null LSN instance + */ + public static Lsn valueOf(LogSequenceNumber value) { + if (value.asLong() == 0) { + return INVALID_LSN; + } + return new Lsn(value.asLong()); + } + + /** + * Create LSN instance by string represent LSN. + * + * @param strValue not null string as two hexadecimal numbers of up to 8 digits each, separated + * by a slash. For example {@code 16/3002D50}, {@code 0/15D68C50} + * @return not null LSN instance where if specified string represent have not valid form {@link + * Lsn#INVALID_LSN} + */ + public static Lsn valueOf(String strValue) { + final int slashIndex = strValue.lastIndexOf('/'); + + if (slashIndex <= 0) { + return INVALID_LSN; + } + + final String logicalXLogStr = strValue.substring(0, slashIndex); + final int logicalXlog = (int) Long.parseLong(logicalXLogStr, 16); + final String segmentStr = strValue.substring(slashIndex + 1, strValue.length()); + final int segment = (int) Long.parseLong(segmentStr, 16); + + final ByteBuffer buf = ByteBuffer.allocate(8); + buf.putInt(logicalXlog); + buf.putInt(segment); + buf.position(0); + final long value = buf.getLong(); + + return Lsn.valueOf(value); + } + + /** @return Long represent position in the write-ahead log stream */ + public long asLong() { + return value; + } + + /** @return PostgreSQL JDBC driver representation of position in the write-ahead log stream */ + public LogSequenceNumber asLogSequenceNumber() { + return LogSequenceNumber.valueOf(value); + } + + /** + * @return String represent position in the write-ahead log stream as two hexadecimal numbers of + * up to 8 digits each, separated by a slash. For example {@code 16/3002D50}, {@code + * 0/15D68C50} + */ + public String asString() { + final ByteBuffer buf = ByteBuffer.allocate(8); + buf.putLong(value); + buf.position(0); + + final int logicalXlog = buf.getInt(); + final int segment = buf.getInt(); + return String.format("%X/%X", logicalXlog, segment); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + final Lsn that = (Lsn) o; + + return value == that.value; + } + + @Override + public int hashCode() { + return (int) (value ^ (value >>> 32)); + } + + public boolean isValid() { + return this != INVALID_LSN; + } + + @Override + public String toString() { + return "LSN{" + asString() + '}'; + } + + @Override + public int compareTo(Lsn o) { + if (value == o.value) { + return 0; + } + // Unsigned comparison + return value + Long.MIN_VALUE < o.value + Long.MIN_VALUE ? -1 : 1; + } +} diff --git a/flink-connector-postgres-cdc/src/test/java/com/ververica/cdc/connectors/postgres/table/PostgreSQLConnectorITCase.java b/flink-connector-postgres-cdc/src/test/java/com/ververica/cdc/connectors/postgres/table/PostgreSQLConnectorITCase.java index 1df755ee221..6d47ba77f6b 100644 --- a/flink-connector-postgres-cdc/src/test/java/com/ververica/cdc/connectors/postgres/table/PostgreSQLConnectorITCase.java +++ b/flink-connector-postgres-cdc/src/test/java/com/ververica/cdc/connectors/postgres/table/PostgreSQLConnectorITCase.java @@ -443,7 +443,6 @@ public void testMetadataColumns() throws Throwable { Collections.sort(expected); assertEquals(expected, actual); result.getJobClient().get().cancel().get(); - Thread.sleep(1 * 1000); } @Test diff --git a/pom.xml b/pom.xml index 6f1194f3d2c..b5ea861017c 100644 --- a/pom.xml +++ b/pom.xml @@ -74,7 +74,7 @@ under the License. 1.17.0 - 1.9.2.Final + 1.9.7.Final 3.2.0 2.2.0