Skip to content

Commit f32029b

Browse files
committed
graph, store: Add GRAPH_STORE_SETUP_TIMEOUT for database setup
Add a separate timeout for database setup operations (migrations, schema creation, FDW configuration) which can legitimately take longer than normal runtime operations. This fixes timeout issues during runner tests when using the default 5s GRAPH_STORE_CONNECTION_TIMEOUT. The new GRAPH_STORE_SETUP_TIMEOUT defaults to 30s and is used only during database initialization via the new get_for_setup() method, preserving the fast-fail behavior (5s) for production runtime operations.
1 parent aaea83f commit f32029b

File tree

4 files changed

+34
-9
lines changed

4 files changed

+34
-9
lines changed

docs/environment-variables.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,9 @@ those.
216216
decisions. Set to `true` to turn simulation on, defaults to `false`
217217
- `GRAPH_STORE_CONNECTION_TIMEOUT`: How long to wait to connect to a
218218
database before assuming the database is down in ms. Defaults to 5000ms.
219+
- `GRAPH_STORE_SETUP_TIMEOUT`: Timeout for database setup operations
220+
(migrations, schema creation) in milliseconds. Defaults to 30000ms (30s).
221+
Setup operations can legitimately take longer than normal runtime operations.
219222
- `GRAPH_STORE_CONNECTION_UNAVAILABLE_RETRY`: When a database shard is marked
220223
unavailable due to connection timeouts, this controls how often to allow a
221224
single probe request through to check if the database has recovered. Only one

graph/src/env/store.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ pub struct EnvVarsStore {
6363
/// Set by the environment variable `GRAPH_STORE_CONNECTION_TIMEOUT` (expressed
6464
/// in milliseconds). The default value is 5000ms.
6565
pub connection_timeout: Duration,
66+
/// Set by `GRAPH_STORE_SETUP_TIMEOUT` (in milliseconds). Default: 30000ms.
67+
/// Used during database setup (migrations, schema creation) which can
68+
/// legitimately take longer than normal operations.
69+
pub setup_timeout: Duration,
6670
/// Set by the environment variable `GRAPH_STORE_CONNECTION_MIN_IDLE`. No
6771
/// default value is provided.
6872
pub connection_min_idle: Option<u32>,
@@ -214,6 +218,7 @@ impl TryFrom<InnerStore> for EnvVarsStore {
214218
),
215219
recent_blocks_cache_capacity: x.recent_blocks_cache_capacity,
216220
connection_timeout: Duration::from_millis(x.connection_timeout_in_millis),
221+
setup_timeout: Duration::from_millis(x.setup_timeout_in_millis),
217222
connection_min_idle: x.connection_min_idle,
218223
connection_idle_timeout: Duration::from_secs(x.connection_idle_timeout_in_secs),
219224
write_queue_size: x.write_queue_size,
@@ -299,6 +304,8 @@ pub struct InnerStore {
299304
// configured differently for each pool.
300305
#[envconfig(from = "GRAPH_STORE_CONNECTION_TIMEOUT", default = "5000")]
301306
connection_timeout_in_millis: u64,
307+
#[envconfig(from = "GRAPH_STORE_SETUP_TIMEOUT", default = "30000")]
308+
setup_timeout_in_millis: u64,
302309
#[envconfig(from = "GRAPH_STORE_CONNECTION_MIN_IDLE")]
303310
connection_min_idle: Option<u32>,
304311
#[envconfig(from = "GRAPH_STORE_CONNECTION_IDLE_TIMEOUT", default = "600")]

store/postgres/src/pool/coordinator.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ impl PoolCoordinator {
265265

266266
let primary = self.primary()?;
267267

268-
let mut pconn = primary.get().await?;
268+
let mut pconn = primary.get_for_setup().await?;
269269

270270
let states: Vec<_> = states
271271
.into_iter()

store/postgres/src/pool/mod.rs

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -620,6 +620,18 @@ impl PoolInner {
620620
self.get_from_pool(&self.pool, None, Duration::ZERO).await
621621
}
622622

623+
/// Get a connection using the setup timeout. Use only during database
624+
/// initialization where operations can legitimately take longer.
625+
async fn get_for_setup(&self) -> Result<AsyncPgConnection, StoreError> {
626+
let setup_timeouts = Timeouts {
627+
wait: Some(ENV_VARS.store.setup_timeout),
628+
create: Some(ENV_VARS.store.setup_timeout),
629+
recycle: Some(ENV_VARS.store.setup_timeout),
630+
};
631+
self.get_from_pool(&self.pool, Some(setup_timeouts), Duration::ZERO)
632+
.await
633+
}
634+
623635
/// Get the pool for fdw connections. It is an error if none is configured
624636
fn fdw_pool(&self, logger: &Logger) -> Result<&AsyncPool, StoreError> {
625637
let pool = match &self.fdw_pool {
@@ -701,7 +713,7 @@ impl PoolInner {
701713
}
702714

703715
async fn locale_check(&self, logger: &Logger) -> Result<(), StoreError> {
704-
let mut conn = self.get().await?;
716+
let mut conn = self.get_for_setup().await?;
705717
let _: () = if let Err(msg) = catalog::Locale::load(&mut conn).await?.suitable() {
706718
if self.shard == *PRIMARY_SHARD && primary::is_empty(&mut conn).await? {
707719
const MSG: &str = "Database does not use C locale. \
@@ -751,7 +763,7 @@ impl PoolInner {
751763

752764
async fn configure_fdw(&self, servers: &[ForeignServer]) -> Result<(), StoreError> {
753765
info!(&self.logger, "Setting up fdw");
754-
let mut conn = self.get().await?;
766+
let mut conn = self.get_for_setup().await?;
755767
conn.batch_execute("create extension if not exists postgres_fdw")
756768
.await?;
757769
conn.transaction(|conn| {
@@ -790,7 +802,10 @@ impl PoolInner {
790802
// careful that block_on only gets called on a blocking thread to
791803
// avoid errors from the tokio runtime
792804
let logger = self.logger.cheap_clone();
793-
let mut conn = self.get().await.map(AsyncConnectionWrapper::from)?;
805+
let mut conn = self
806+
.get_for_setup()
807+
.await
808+
.map(AsyncConnectionWrapper::from)?;
794809

795810
tokio::task::spawn_blocking(move || {
796811
diesel::Connection::transaction::<_, StoreError, _>(&mut conn, |conn| {
@@ -808,7 +823,7 @@ impl PoolInner {
808823
}
809824

810825
info!(&self.logger, "Dropping cross-shard views");
811-
let mut conn = self.get().await?;
826+
let mut conn = self.get_for_setup().await?;
812827
conn.transaction(|conn| {
813828
async {
814829
let query = format!("drop schema if exists {} cascade", CROSS_SHARD_NSP);
@@ -845,7 +860,7 @@ impl PoolInner {
845860
return Ok(());
846861
}
847862

848-
let mut conn = self.get().await?;
863+
let mut conn = self.get_for_setup().await?;
849864
let sharded = Namespace::special(CROSS_SHARD_NSP);
850865
if catalog::has_namespace(&mut conn, &sharded).await? {
851866
// We dropped the namespace before, but another node must have
@@ -897,7 +912,7 @@ impl PoolInner {
897912
pub async fn remap(&self, server: &ForeignServer) -> Result<(), StoreError> {
898913
if server.shard == *PRIMARY_SHARD {
899914
info!(&self.logger, "Mapping primary");
900-
let mut conn = self.get().await?;
915+
let mut conn = self.get_for_setup().await?;
901916
conn.transaction(|conn| ForeignServer::map_primary(conn, &self.shard).scope_boxed())
902917
.await?;
903918
}
@@ -907,7 +922,7 @@ impl PoolInner {
907922
"Mapping metadata from {}",
908923
server.shard.as_str()
909924
);
910-
let mut conn = self.get().await?;
925+
let mut conn = self.get_for_setup().await?;
911926
conn.transaction(|conn| server.map_metadata(conn).scope_boxed())
912927
.await?;
913928
}
@@ -919,7 +934,7 @@ impl PoolInner {
919934
return Ok(false);
920935
}
921936

922-
let mut conn = self.get().await?;
937+
let mut conn = self.get_for_setup().await?;
923938
server.needs_remap(&mut conn).await
924939
}
925940
}

0 commit comments

Comments
 (0)