Skip to content

Conversation

@sean-rose
Copy link
Contributor

@sean-rose sean-rose commented Nov 20, 2024

Description

There's a bug in BigQueryTablePartitionExistenceSensor when it runs in deferrable mode which is causing the secondary queries it runs to detect partitions to fail (e.g. bug 1932180).

This is being fixed in apache/airflow#44225, and this PR should ultimately be reverted once that bugfix is merged, included in an apache-airflow-providers-google release, and we upgrade telemetry-airflow to that release.

Related Tickets & Documents

Reviewer, please follow this checklist

┆Issue is synchronized with this Jira Task

… reschedule mode.

There's a bug in those sensors when they run in deferrable mode where the secondary queries they run to detect partitions fail, which is being fixed in apache/airflow#44225.
poke_interval={{ table_partition_sensor_task.poke_interval | format_timedelta | format_repr }},
{% else -%}
poke_interval=datetime.timedelta(minutes=5),
poke_interval=datetime.timedelta(minutes=15),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I increased the default poke interval because starting up a Kubernetes pod every time the sensor runs is more resource intensive than when it was in deferrable mode (I think it takes around a minute to start each pod).

@dataops-ci-bot
Copy link

Integration report for "Fix test_dag_with_bigquery_table_sensors."

sql.diff

Click to expand!
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_google_search_console.py /tmp/workspace/generated-sql/dags/bqetl_google_search_console.py
--- /tmp/workspace/main-generated-sql/dags/bqetl_google_search_console.py	2024-11-20 21:16:43.000000000 +0000
+++ /tmp/workspace/generated-sql/dags/bqetl_google_search_console.py	2024-11-20 21:18:41.000000000 +0000
@@ -67,8 +67,9 @@
             table_id="searchdata_url_impression",
             partition_id="{{ data_interval_start.subtract(days=1) | ds_nodash }}",
             gcp_conn_id="google_cloud_shared_prod",
-            deferrable=True,
-            poke_interval=datetime.timedelta(minutes=5),
+            deferrable=False,
+            mode="reschedule",
+            poke_interval=datetime.timedelta(minutes=15),
             timeout=datetime.timedelta(hours=8),
         )
     )
@@ -81,8 +82,9 @@
             table_id="searchdata_url_impression",
             partition_id="{{ data_interval_start.subtract(days=1) | ds_nodash }}",
             gcp_conn_id="google_cloud_shared_prod",
-            deferrable=True,
-            poke_interval=datetime.timedelta(minutes=5),
+            deferrable=False,
+            mode="reschedule",
+            poke_interval=datetime.timedelta(minutes=15),
             timeout=datetime.timedelta(hours=8),
         )
     )
@@ -95,8 +97,9 @@
             table_id="searchdata_url_impression",
             partition_id="{{ data_interval_start.subtract(days=1) | ds_nodash }}",
             gcp_conn_id="google_cloud_shared_prod",
-            deferrable=True,
-            poke_interval=datetime.timedelta(minutes=5),
+            deferrable=False,
+            mode="reschedule",
+            poke_interval=datetime.timedelta(minutes=15),
             timeout=datetime.timedelta(hours=8),
         )
     )
@@ -109,8 +112,9 @@
             table_id="searchdata_url_impression",
             partition_id="{{ data_interval_start.subtract(days=1) | ds_nodash }}",
             gcp_conn_id="google_cloud_shared_prod",
-            deferrable=True,
-            poke_interval=datetime.timedelta(minutes=5),
+            deferrable=False,
+            mode="reschedule",
+            poke_interval=datetime.timedelta(minutes=15),
             timeout=datetime.timedelta(hours=8),
         )
     )
@@ -123,8 +127,9 @@
             table_id="searchdata_url_impression",
             partition_id="{{ data_interval_start.subtract(days=1) | ds_nodash }}",
             gcp_conn_id="google_cloud_shared_prod",
-            deferrable=True,
-            poke_interval=datetime.timedelta(minutes=5),
+            deferrable=False,
+            mode="reschedule",
+            poke_interval=datetime.timedelta(minutes=15),
             timeout=datetime.timedelta(hours=8),
         )
     )
@@ -137,8 +142,9 @@
             table_id="searchdata_url_impression",
             partition_id="{{ data_interval_start.subtract(days=1) | ds_nodash }}",
             gcp_conn_id="google_cloud_shared_prod",
-            deferrable=True,
-            poke_interval=datetime.timedelta(minutes=5),
+            deferrable=False,
+            mode="reschedule",
+            poke_interval=datetime.timedelta(minutes=15),
             timeout=datetime.timedelta(hours=8),
         )
     )
@@ -151,8 +157,9 @@
             table_id="searchdata_site_impression",
             partition_id="{{ data_interval_start.subtract(days=1) | ds_nodash }}",
             gcp_conn_id="google_cloud_shared_prod",
-            deferrable=True,
-            poke_interval=datetime.timedelta(minutes=5),
+            deferrable=False,
+            mode="reschedule",
+            poke_interval=datetime.timedelta(minutes=15),
             timeout=datetime.timedelta(hours=8),
         )
     )
@@ -165,8 +172,9 @@
             table_id="searchdata_site_impression",
             partition_id="{{ data_interval_start.subtract(days=1) | ds_nodash }}",
             gcp_conn_id="google_cloud_shared_prod",
-            deferrable=True,
-            poke_interval=datetime.timedelta(minutes=5),
+            deferrable=False,
+            mode="reschedule",
+            poke_interval=datetime.timedelta(minutes=15),
             timeout=datetime.timedelta(hours=8),
         )
     )
@@ -179,8 +187,9 @@
             table_id="searchdata_site_impression",
             partition_id="{{ data_interval_start.subtract(days=1) | ds_nodash }}",
             gcp_conn_id="google_cloud_shared_prod",
-            deferrable=True,
-            poke_interval=datetime.timedelta(minutes=5),
+            deferrable=False,
+            mode="reschedule",
+            poke_interval=datetime.timedelta(minutes=15),
             timeout=datetime.timedelta(hours=8),
         )
     )
@@ -193,8 +202,9 @@
             table_id="searchdata_site_impression",
             partition_id="{{ data_interval_start.subtract(days=1) | ds_nodash }}",
             gcp_conn_id="google_cloud_shared_prod",
-            deferrable=True,
-            poke_interval=datetime.timedelta(minutes=5),
+            deferrable=False,
+            mode="reschedule",
+            poke_interval=datetime.timedelta(minutes=15),
             timeout=datetime.timedelta(hours=8),
         )
     )
@@ -207,8 +217,9 @@
             table_id="searchdata_site_impression",
             partition_id="{{ data_interval_start.subtract(days=1) | ds_nodash }}",
             gcp_conn_id="google_cloud_shared_prod",
-            deferrable=True,
-            poke_interval=datetime.timedelta(minutes=5),
+            deferrable=False,
+            mode="reschedule",
+            poke_interval=datetime.timedelta(minutes=15),
             timeout=datetime.timedelta(hours=8),
         )
     )
@@ -221,8 +232,9 @@
             table_id="searchdata_site_impression",
             partition_id="{{ data_interval_start.subtract(days=1) | ds_nodash }}",
             gcp_conn_id="google_cloud_shared_prod",
-            deferrable=True,
-            poke_interval=datetime.timedelta(minutes=5),
+            deferrable=False,
+            mode="reschedule",
+            poke_interval=datetime.timedelta(minutes=15),
             timeout=datetime.timedelta(hours=8),
         )
     )

Link to full diff

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants