diff --git a/airbyte-integrations/connectors/source-postgres/acceptance-test-config.yml b/airbyte-integrations/connectors/source-postgres/acceptance-test-config.yml index bd4d94516f7c..ba2bf722045d 100644 --- a/airbyte-integrations/connectors/source-postgres/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-postgres/acceptance-test-config.yml @@ -35,13 +35,3 @@ acceptance_tests: tests: - config_path: "secrets/config.json" - config_path: "secrets/config_cdc.json" - incremental: - tests: - - config_path: "secrets/config.json" - configured_catalog_path: "integration_tests/incremental_configured_catalog.json" - future_state: - bypass_reason: "A java.lang.NullPointerException is thrown when a state with an invalid cursor value is passed" - - config_path: "secrets/config_cdc.json" - configured_catalog_path: "integration_tests/incremental_configured_catalog.json" - future_state: - bypass_reason: "A java.lang.NullPointerException is thrown when a state with an invalid cursor value is passed" diff --git a/airbyte-integrations/connectors/source-postgres/metadata.yaml b/airbyte-integrations/connectors/source-postgres/metadata.yaml index 8164fda60846..4de2eeb64026 100644 --- a/airbyte-integrations/connectors/source-postgres/metadata.yaml +++ b/airbyte-integrations/connectors/source-postgres/metadata.yaml @@ -9,7 +9,7 @@ data: connectorSubtype: database connectorType: source definitionId: decd338e-5647-4c0b-adf4-da0e75f5a750 - dockerImageTag: 3.4.10 + dockerImageTag: 3.4.11 dockerRepository: airbyte/source-postgres documentationUrl: https://docs.airbyte.com/integrations/sources/postgres githubIssueLabel: source-postgres diff --git a/airbyte-integrations/connectors/source-postgres/src/main/resources/spec.json b/airbyte-integrations/connectors/source-postgres/src/main/resources/spec.json index e1cc6ff8b367..663811e38bb3 100644 --- a/airbyte-integrations/connectors/source-postgres/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/source-postgres/src/main/resources/spec.json @@ -309,7 +309,7 @@ }, { "title": "Detect Changes with Xmin System Column", - "description": "Recommended - Incrementally reads new inserts and updates via Postgres Xmin system column. Only recommended for tables up to 500GB.", + "description": "Recommended - Incrementally reads new inserts and updates via Postgres Xmin system column. Suitable for databases that have low transaction pressure.", "required": ["method"], "properties": { "method": { diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/resources/expected_cloud_deployment_spec.json b/airbyte-integrations/connectors/source-postgres/src/test-integration/resources/expected_cloud_deployment_spec.json index 1243163310b0..6ce55ffb5225 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/resources/expected_cloud_deployment_spec.json +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/resources/expected_cloud_deployment_spec.json @@ -310,7 +310,7 @@ }, { "title": "Detect Changes with Xmin System Column", - "description": "Recommended - Incrementally reads new inserts and updates via Postgres Xmin system column. Only recommended for tables up to 500GB.", + "description": "Recommended - Incrementally reads new inserts and updates via Postgres Xmin system column. Suitable for databases that have low transaction pressure.", "required": ["method"], "properties": { "method": { diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/resources/expected_spec.json b/airbyte-integrations/connectors/source-postgres/src/test-integration/resources/expected_spec.json index 040446878181..2a68a0097326 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/resources/expected_spec.json +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/resources/expected_spec.json @@ -309,7 +309,7 @@ }, { "title": "Detect Changes with Xmin System Column", - "description": "Recommended - Incrementally reads new inserts and updates via Postgres Xmin system column. Only recommended for tables up to 500GB.", + "description": "Recommended - Incrementally reads new inserts and updates via Postgres Xmin system column. Suitable for databases that have low transaction pressure.", "required": ["method"], "properties": { "method": { diff --git a/docs/integrations/sources/postgres.md b/docs/integrations/sources/postgres.md index 4e2e6640a690..49e4876d412f 100644 --- a/docs/integrations/sources/postgres.md +++ b/docs/integrations/sources/postgres.md @@ -191,8 +191,7 @@ This is a good solution if: - There is not a well-defined cursor candidate to use for Standard incremental mode. - You want to replace a previously configured full-refresh sync. -- You are replicating Postgres tables less than 500GB. -- Your database doesn't incur heavy writes that would lead to transaction ID wrap around +- Your database doesn't incur heavy writes that would lead to transaction ID wraparound. - You are not replicating non-materialized views. Non-materialized views are not supported by xmin replication. ## Connecting with SSL or SSH Tunneling @@ -312,7 +311,8 @@ According to Postgres [documentation](https://www.postgresql.org/docs/14/datatyp | Version | Date | Pull Request | Subject | |---------|------------|----------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| 3.4.10 | 2024-05-29 | [38584](https://github.com/airbytehq/airbyte/pull/38584) | Set is_resumable flag in discover. | +| 3.4.11 | 2024-06-04 | [38848](https://github.com/airbytehq/airbyte/pull/38848) | Improve UI message and doc on xmin | +| 3.4.10 | 2024-05-29 | [38584](https://github.com/airbytehq/airbyte/pull/38584) | Set is_resumable flag in discover. | | 3.4.9 | 2024-05-29 | [38775](https://github.com/airbytehq/airbyte/pull/38775) | Publish CDK | | 3.4.9 | 2024-05-28 | [38716](https://github.com/airbytehq/airbyte/pull/38716) | Publish CDK | | 3.4.8 | 2024-05-28 | [38716](https://github.com/airbytehq/airbyte/pull/38716) | Stream status for postgres | @@ -559,4 +559,4 @@ According to Postgres [documentation](https://www.postgresql.org/docs/14/datatyp | 0.1.5 | 2020-11-30 | [1038](https://github.com/airbytehq/airbyte/pull/1038) | Change JDBC sources to discover more than standard schemas | | 0.1.4 | 2020-11-30 | [1046](https://github.com/airbytehq/airbyte/pull/1046) | Add connectors using an index YAML file | - \ No newline at end of file + diff --git a/docs/integrations/sources/postgres/postgres-troubleshooting.md b/docs/integrations/sources/postgres/postgres-troubleshooting.md index b1f76ad4a90a..dfe26312a53b 100644 --- a/docs/integrations/sources/postgres/postgres-troubleshooting.md +++ b/docs/integrations/sources/postgres/postgres-troubleshooting.md @@ -12,6 +12,13 @@ - Resetting a single table within the connection without resetting the rest of the destination tables in that connection - Changing a column data type or removing a column might break connections. +### Xmin Limitations +There are some notable shortcomings associated with the Xmin replication method: +- Unsupported DDL operations : This replication method cannot support row deletions. +- Performance : Requires a full table scan, so can lead to poor performance. +- Row-level granularity : The xmin column is stored at the row level. This means that a row will still be synced if it had been modified, regardless of whether the modification corresponded to the subset of columns the user is interested in. +- Transaction ID (XID) wraparound : the transaction ID (aka xid) is represented by a 32-bit integer and has an upper limit value of 4,294,967,295. Once this value is reached, the xid wraps around and stops increasing monotonically. At this point, the xmin column cannot be reliably used as a cursor, which can lead to resyncing data that had already been synced. Also see the trouble-shooting section on Xmin wraparound below. + ### Version Requirements - For Airbyte Open Source users, [upgrade](https://docs.airbyte.com/operator-guides/upgrading-airbyte/) your Airbyte platform to version `v0.58.0` or newer @@ -111,6 +118,10 @@ The root causes is that the WALs needed for the incremental sync has been remove Some larger tables may encounter an error related to the temporary file size limit such as `temporary file size exceeds temp_file_limit`. To correct this error increase the [temp_file_limit](https://postgresqlco.nf/doc/en/param/temp_file_limit/). +### Xmin Wraparound + +When a database experiences Xmin wraparound, the replication performance will be degraded. Furthermore, data that has already been synced may be resynced again. When setting up a Postgres source connector or at the beginning of the sync, the connector will check if an Xmin wraparound exists. If so, the connector returns a config error, reminding the user to switch to the CDC replication method. + ### (Advanced) Custom JDBC Connection Strings To customize the JDBC connection beyond common options, specify additional supported [JDBC URL parameters](https://jdbc.postgresql.org/documentation/head/connect.html) as key-value pairs separated by the symbol & in the **JDBC URL Parameters (Advanced)** field.