Skip to content

Commit b93303e

Browse files
authored
Live reloading entire config and bug fixes (#84)
* Support reloading the entire config (including sharding logic) without restart. * Fix bug incorrectly handing error reporting when the shard is set incorrectly via SET SHARD TO command. selected wrong shard and the connection keep reporting fatal #80. * Fix total_received and avg_recv admin database statistics. * Enabling the query parser by default. * More tests.
1 parent d865d9f commit b93303e

File tree

14 files changed

+393
-188
lines changed

14 files changed

+393
-188
lines changed

.circleci/pgcat.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ default_role = "any"
8787
# every incoming query to determine if it's a read or a write.
8888
# If it's a read query, we'll direct it to a replica. Otherwise, if it's a write,
8989
# we'll direct it to the primary.
90-
query_parser_enabled = false
90+
query_parser_enabled = true
9191

9292
# If the query parser is enabled and this setting is enabled, the primary will be part of the pool of databases used for
9393
# load balancing of read queries. Otherwise, the primary will only be used for write

.circleci/run_tests.sh

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,15 @@ pgbench -U sharding_user -h 127.0.0.1 -p 6432 -t 500 -c 2 --protocol extended
4242
psql -U sharding_user -h 127.0.0.1 -p 6432 -c 'COPY (SELECT * FROM pgbench_accounts LIMIT 15) TO STDOUT;' > /dev/null
4343

4444
# Query cancellation test
45-
(psql -U sharding_user -h 127.0.0.1 -p 6432 -c 'SELECT pg_sleep(5)' || true) &
45+
(psql -U sharding_user -h 127.0.0.1 -p 6432 -c 'SELECT pg_sleep(50)' || true) &
46+
sleep 1
47+
killall psql -s SIGINT
48+
49+
# Reload pool (closing unused server connections)
50+
psql -U sharding_user -h 127.0.0.1 -p 6432 -d pgbouncer -c 'RELOAD'
51+
52+
(psql -U sharding_user -h 127.0.0.1 -p 6432 -c 'SELECT pg_sleep(50)' || true) &
53+
sleep 1
4654
killall psql -s SIGINT
4755

4856
# Sharding insert
@@ -94,7 +102,7 @@ toxiproxy-cli toxic remove --toxicName latency_downstream postgres_replica
94102
start_pgcat "info"
95103

96104
# Test session mode (and config reload)
97-
sed -i 's/pool_mode = "transaction"/pool_mode = "session"/' pgcat.toml
105+
sed -i 's/pool_mode = "transaction"/pool_mode = "session"/' .circleci/pgcat.toml
98106

99107
# Reload config test
100108
kill -SIGHUP $(pgrep pgcat)

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "pgcat"
3-
version = "0.2.1-beta1"
3+
version = "0.4.0-beta1"
44
edition = "2021"
55

66
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
@@ -28,4 +28,4 @@ parking_lot = "0.11"
2828
hmac = "0.12"
2929
sha2 = "0.10"
3030
base64 = "0.13"
31-
stringprep = "0.1"
31+
stringprep = "0.1"

pgcat.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ default_role = "any"
8787
# every incoming query to determine if it's a read or a write.
8888
# If it's a read query, we'll direct it to a replica. Otherwise, if it's a write,
8989
# we'll direct it to the primary.
90-
query_parser_enabled = false
90+
query_parser_enabled = true
9191

9292
# If the query parser is enabled and this setting is enabled, the primary will be part of the pool of databases used for
9393
# load balancing of read queries. Otherwise, the primary will only be used for write

src/admin.rs

Lines changed: 13 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,19 @@ use log::{info, trace};
44
use std::collections::HashMap;
55
use tokio::net::tcp::OwnedWriteHalf;
66

7-
use crate::config::{get_config, parse};
7+
use crate::config::{get_config, reload_config};
88
use crate::errors::Error;
99
use crate::messages::*;
1010
use crate::pool::ConnectionPool;
1111
use crate::stats::get_stats;
12+
use crate::ClientServerMap;
1213

1314
/// Handle admin client.
1415
pub async fn handle_admin(
1516
stream: &mut OwnedWriteHalf,
1617
mut query: BytesMut,
1718
pool: ConnectionPool,
19+
client_server_map: ClientServerMap,
1820
) -> Result<(), Error> {
1921
let code = query.get_u8() as char;
2022

@@ -34,7 +36,7 @@ pub async fn handle_admin(
3436
show_stats(stream, &pool).await
3537
} else if query.starts_with("RELOAD") {
3638
trace!("RELOAD");
37-
reload(stream).await
39+
reload(stream, client_server_map).await
3840
} else if query.starts_with("SHOW CONFIG") {
3941
trace!("SHOW CONFIG");
4042
show_config(stream).await
@@ -143,10 +145,7 @@ async fn show_version(stream: &mut OwnedWriteHalf) -> Result<(), Error> {
143145
/// Show utilization of connection pools for each shard and replicas.
144146
async fn show_pools(stream: &mut OwnedWriteHalf, pool: &ConnectionPool) -> Result<(), Error> {
145147
let stats = get_stats();
146-
let config = {
147-
let guard = get_config();
148-
&*guard.clone()
149-
};
148+
let config = get_config();
150149

151150
let columns = vec![
152151
("database", DataType::Text),
@@ -199,9 +198,7 @@ async fn show_pools(stream: &mut OwnedWriteHalf, pool: &ConnectionPool) -> Resul
199198

200199
/// Show shards and replicas.
201200
async fn show_databases(stream: &mut OwnedWriteHalf, pool: &ConnectionPool) -> Result<(), Error> {
202-
let guard = get_config();
203-
let config = &*guard.clone();
204-
drop(guard);
201+
let config = get_config();
205202

206203
// Columns
207204
let columns = vec![
@@ -266,17 +263,15 @@ async fn ignore_set(stream: &mut OwnedWriteHalf) -> Result<(), Error> {
266263
}
267264

268265
/// Reload the configuration file without restarting the process.
269-
async fn reload(stream: &mut OwnedWriteHalf) -> Result<(), Error> {
266+
async fn reload(
267+
stream: &mut OwnedWriteHalf,
268+
client_server_map: ClientServerMap,
269+
) -> Result<(), Error> {
270270
info!("Reloading config");
271271

272-
let config = get_config();
273-
let path = config.path.clone().unwrap();
274-
275-
parse(&path).await?;
276-
277-
let config = get_config();
272+
reload_config(client_server_map).await?;
278273

279-
config.show();
274+
get_config().show();
280275

281276
let mut res = BytesMut::new();
282277

@@ -292,10 +287,8 @@ async fn reload(stream: &mut OwnedWriteHalf) -> Result<(), Error> {
292287

293288
/// Shows current configuration.
294289
async fn show_config(stream: &mut OwnedWriteHalf) -> Result<(), Error> {
295-
let guard = get_config();
296-
let config = &*guard.clone();
290+
let config = &get_config();
297291
let config: HashMap<String, String> = config.into();
298-
drop(guard);
299292

300293
// Configs that cannot be changed without restarting.
301294
let immutables = ["host", "port", "connect_timeout"];

src/client.rs

Lines changed: 71 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@ use crate::config::get_config;
1313
use crate::constants::*;
1414
use crate::errors::Error;
1515
use crate::messages::*;
16-
use crate::pool::{ClientServerMap, ConnectionPool};
16+
use crate::pool::{get_pool, ClientServerMap};
1717
use crate::query_router::{Command, QueryRouter};
1818
use crate::server::Server;
19-
use crate::stats::Reporter;
19+
use crate::stats::{get_reporter, Reporter};
2020

2121
/// The client state. One of these is created per client.
2222
pub struct Client {
@@ -69,12 +69,11 @@ impl Client {
6969
pub async fn startup(
7070
mut stream: TcpStream,
7171
client_server_map: ClientServerMap,
72-
server_info: BytesMut,
73-
stats: Reporter,
7472
) -> Result<Client, Error> {
75-
let config = get_config().clone();
76-
let transaction_mode = config.general.pool_mode.starts_with("t");
77-
// drop(config);
73+
let config = get_config();
74+
let transaction_mode = config.general.pool_mode == "transaction";
75+
let stats = get_reporter();
76+
7877
loop {
7978
trace!("Waiting for StartupMessage");
8079

@@ -154,9 +153,10 @@ impl Client {
154153
debug!("Password authentication successful");
155154

156155
auth_ok(&mut stream).await?;
157-
write_all(&mut stream, server_info).await?;
156+
write_all(&mut stream, get_pool().server_info()).await?;
158157
backend_key_data(&mut stream, process_id, secret_key).await?;
159158
ready_for_query(&mut stream).await?;
159+
160160
trace!("Startup OK");
161161

162162
let database = parameters
@@ -221,7 +221,7 @@ impl Client {
221221
}
222222

223223
/// Handle a connected and authenticated client.
224-
pub async fn handle(&mut self, mut pool: ConnectionPool) -> Result<(), Error> {
224+
pub async fn handle(&mut self) -> Result<(), Error> {
225225
// The client wants to cancel a query it has issued previously.
226226
if self.cancel_mode {
227227
trace!("Sending CancelRequest");
@@ -252,13 +252,19 @@ impl Client {
252252
return Ok(Server::cancel(&address, &port, process_id, secret_key).await?);
253253
}
254254

255+
// The query router determines where the query is going to go,
256+
// e.g. primary, replica, which shard.
255257
let mut query_router = QueryRouter::new();
258+
let mut round_robin = 0;
256259

257260
// Our custom protocol loop.
258261
// We expect the client to either start a transaction with regular queries
259262
// or issue commands for our sharding and server selection protocol.
260263
loop {
261-
trace!("Client idle, waiting for message");
264+
trace!(
265+
"Client idle, waiting for message, transaction mode: {}",
266+
self.transaction_mode
267+
);
262268

263269
// Read a complete message from the client, which normally would be
264270
// either a `Q` (query) or `P` (prepare, extended protocol).
@@ -267,32 +273,63 @@ impl Client {
267273
// SET SHARDING KEY TO 'bigint';
268274
let mut message = read_message(&mut self.read).await?;
269275

276+
// Get a pool instance referenced by the most up-to-date
277+
// pointer. This ensures we always read the latest config
278+
// when starting a query.
279+
let mut pool = get_pool();
280+
270281
// Avoid taking a server if the client just wants to disconnect.
271282
if message[0] as char == 'X' {
272-
trace!("Client disconnecting");
283+
debug!("Client disconnecting");
273284
return Ok(());
274285
}
275286

276287
// Handle admin database queries.
277288
if self.admin {
278-
trace!("Handling admin command");
279-
handle_admin(&mut self.write, message, pool.clone()).await?;
289+
debug!("Handling admin command");
290+
handle_admin(
291+
&mut self.write,
292+
message,
293+
pool.clone(),
294+
self.client_server_map.clone(),
295+
)
296+
.await?;
280297
continue;
281298
}
282299

300+
let current_shard = query_router.shard();
301+
283302
// Handle all custom protocol commands, if any.
284303
match query_router.try_execute_command(message.clone()) {
285304
// Normal query, not a custom command.
286-
None => {
287-
// Attempt to infer which server we want to query, i.e. primary or replica.
288-
if query_router.query_parser_enabled() && query_router.role() == None {
289-
query_router.infer_role(message.clone());
290-
}
291-
}
305+
None => (),
292306

293307
// SET SHARD TO
294308
Some((Command::SetShard, _)) => {
295-
custom_protocol_response_ok(&mut self.write, "SET SHARD").await?;
309+
// Selected shard is not configured.
310+
if query_router.shard() >= pool.shards() {
311+
// Set the shard back to what it was.
312+
query_router.set_shard(current_shard);
313+
314+
error_response(
315+
&mut self.write,
316+
&format!(
317+
"shard {} is more than configured {}, staying on shard {}",
318+
query_router.shard(),
319+
pool.shards(),
320+
current_shard,
321+
),
322+
)
323+
.await?;
324+
} else {
325+
custom_protocol_response_ok(&mut self.write, "SET SHARD").await?;
326+
}
327+
continue;
328+
}
329+
330+
// SET PRIMARY READS TO
331+
Some((Command::SetPrimaryReads, _)) => {
332+
custom_protocol_response_ok(&mut self.write, "SET PRIMARY READS").await?;
296333
continue;
297334
}
298335

@@ -319,27 +356,24 @@ impl Client {
319356
show_response(&mut self.write, "shard", &value).await?;
320357
continue;
321358
}
322-
};
323359

324-
// Make sure we selected a valid shard.
325-
if query_router.shard() >= pool.shards() {
326-
error_response(
327-
&mut self.write,
328-
&format!(
329-
"shard {} is more than configured {}",
330-
query_router.shard(),
331-
pool.shards()
332-
),
333-
)
334-
.await?;
335-
continue;
336-
}
360+
// SHOW PRIMARY READS
361+
Some((Command::ShowPrimaryReads, value)) => {
362+
show_response(&mut self.write, "primary reads", &value).await?;
363+
continue;
364+
}
365+
};
337366

338367
debug!("Waiting for connection from pool");
339368

340369
// Grab a server from the pool.
341370
let connection = match pool
342-
.get(query_router.shard(), query_router.role(), self.process_id)
371+
.get(
372+
query_router.shard(),
373+
query_router.role(),
374+
self.process_id,
375+
round_robin,
376+
)
343377
.await
344378
{
345379
Ok(conn) => {
@@ -358,6 +392,8 @@ impl Client {
358392
let address = connection.1;
359393
let server = &mut *reference;
360394

395+
round_robin += 1;
396+
361397
// Server is assigned to the client in case the client wants to
362398
// cancel a query later.
363399
server.claim(self.process_id, self.secret_key);

0 commit comments

Comments
 (0)