Skip to content

Commit 0846a17

Browse files
drdrshdat2zainkabanilevkkchhetripradeep
authored
Sync with upstream (postgresml#26)
* Prevent clients from sticking to old pools after config update (postgresml#113) * Re-acquire pool at the beginning of Protocol loop * Fix query router + add tests for recycling behavior * create a prometheus exporter on a standard http port (postgresml#107) * create a hyper server and add option to enable it in config * move prometheus stuff to its own file; update format * create metric type and help lookup table * finish the metric help type map * switch to a boolean and a standard port * dont emit unimplemented metrics * fail if curl returns a non 200 * resolve conflicts * move log out of config.show and into main * terminating new line * upgrade curl * include unimplemented stats * Validates pgcat is closed after shutdown python tests (postgresml#116) * Validates pgcat is closed after shutdown python tests * Fix pgrep logic * Moves sigterm step to after cleanup to decouple * Replace subprocess with os.system for running pgcat * fix docker compose port allocation for local dev (postgresml#117) change docker compose port to right prometheus port * Update CONTRIBUTING.md * Health check delay (postgresml#118) * initial commit of server check delay implementation * fmt * spelling * Update name to last_healthcheck and some comments * Moved server tested stat to after require_healthcheck check * Make health check delay configurable * Rename to last_activity * Fix typo * Add debug log for healthcheck * Add address to debug log * Speed up CI a bit (postgresml#119) * Sleep for 1s * use premade image * quicker * revert shutdown timeout * Fix debug log (postgresml#120) * Make prometheus port configurable (postgresml#121) * Make prometheus port configurable * Update circleci config * Statement timeout + replica imbalance fix (postgresml#122) * Statement timeout * send error message too * Correct error messages * Fix replica inbalance * disable stmt timeout by default * Redundant mark_bad * revert healthcheck delay * tests * set it to 0 * reload config again * pgcat toml Co-authored-by: Nicholas Dujay <3258756+dat2@users.noreply.github.com> Co-authored-by: zainkabani <77307340+zainkabani@users.noreply.github.com> Co-authored-by: Lev Kokotov <levkk@users.noreply.github.com> Co-authored-by: Pradeep Chhetri <30620077+chhetripradeep@users.noreply.github.com>
1 parent dfe0e05 commit 0846a17

17 files changed

+310
-81
lines changed

.circleci/config.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ jobs:
99
# Specify the execution environment. You can specify an image from Dockerhub or use one of our Convenience Images from CircleCI's Developer Hub.
1010
# See: https://circleci.com/docs/2.0/configuration-reference/#docker-machine-macos-windows-executor
1111
docker:
12-
- image: cimg/rust:1.62.0
12+
- image: levkk/pgcat-ci:latest
1313
environment:
1414
RUST_LOG: info
1515
RUSTFLAGS: "-C instrument-coverage"

.circleci/pgcat.toml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,21 @@ host = "0.0.0.0"
1111
# Port to run on, same as PgBouncer used in this example.
1212
port = 6432
1313

14-
# enable prometheus exporter on port 9930
14+
# Whether to enable prometheus exporter or not.
1515
enable_prometheus_exporter = true
1616

17+
# Port at which prometheus exporter listens on.
18+
prometheus_exporter_port = 9930
19+
1720
# How long to wait before aborting a server connection (ms).
1821
connect_timeout = 100
1922

2023
# How much time to give the health check query to return with a result (ms).
2124
healthcheck_timeout = 100
2225

26+
# How long to keep connection available for immediate re-use, without running a healthcheck query on it
27+
healthcheck_delay = 30000
28+
2329
# How much time to give clients during shutdown before forcibly killing client connections (ms).
2430
shutdown_timeout = 5000
2531

@@ -85,11 +91,13 @@ password = "sharding_user"
8591
# The maximum number of connection from a single Pgcat process to any database in the cluster
8692
# is the sum of pool_size across all users.
8793
pool_size = 9
94+
statement_timeout = 0
8895

8996
[pools.sharded_db.users.1]
9097
username = "other_user"
9198
password = "other_user"
9299
pool_size = 21
100+
statement_timeout = 30000
93101

94102
# Shard 0
95103
[pools.sharded_db.shards.0]
@@ -127,6 +135,7 @@ sharding_function = "pg_bigint_hash"
127135
username = "simple_user"
128136
password = "simple_user"
129137
pool_size = 5
138+
statement_timeout = 30000
130139

131140
[pools.simple_db.shards.0]
132141
servers = [

.circleci/run_tests.sh

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,18 @@ psql -U sharding_user -e -h 127.0.0.1 -p 6432 -f tests/sharding/query_routing_te
6666
# Replica/primary selection & more sharding tests
6767
psql -U sharding_user -e -h 127.0.0.1 -p 6432 -f tests/sharding/query_routing_test_primary_replica.sql > /dev/null
6868

69+
# Statement timeout tests
70+
sed -i 's/statement_timeout = 0/statement_timeout = 100/' .circleci/pgcat.toml
71+
kill -SIGHUP $(pgrep pgcat) # Reload config
72+
sleep 0.2
73+
74+
# This should timeout
75+
(! psql -U sharding_user -e -h 127.0.0.1 -p 6432 -c 'select pg_sleep(0.5)')
76+
77+
# Disable statement timeout
78+
sed -i 's/statement_timeout = 100/statement_timeout = 0/' .circleci/pgcat.toml
79+
kill -SIGHUP $(pgrep pgcat) # Reload config again
80+
6981
#
7082
# ActiveRecord tests
7183
#
@@ -122,6 +134,8 @@ sed -i 's/pool_mode = "transaction"/pool_mode = "session"/' .circleci/pgcat.toml
122134
# Reload config test
123135
kill -SIGHUP $(pgrep pgcat)
124136

137+
sleep 1
138+
125139
# Prepared statements that will only work in session mode
126140
pgbench -U sharding_user -h 127.0.0.1 -p 6432 -t 500 -c 2 --protocol prepared
127141

CONTRIBUTING.md

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,4 @@ Happy hacking!
1010

1111
## TODOs
1212

13-
A non-exhaustive list of things that would be useful to implement:
14-
15-
#### Client authentication
16-
MD5 is probably sufficient, but maybe others too.
17-
18-
#### Admin
19-
Admin database for stats collection and pooler administration. PgBouncer gives us a nice example on how to do that, specifically how to implement `RowDescription` and `DataRow` messages, [example here](https://github.com/pgbouncer/pgbouncer/blob/4f9ced8e63d317a6ff45c8b0efa876b32161f6db/src/admin.c#L813).
13+
See [Issues]([url](https://github.com/levkk/pgcat/issues)).

Dockerfile.ci

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
FROM cimg/rust:1.62.0
2+
RUN sudo apt-get update && \
3+
sudo apt-get install -y psmisc postgresql-contrib-12 postgresql-client-12 ruby ruby-dev libpq-dev python3 python3-pip lcov llvm-11 && \
4+
sudo apt-get upgrade curl
5+
RUN cargo install cargo-binutils rustfilt && \
6+
rustup component add llvm-tools-preview
7+
RUN pip3 install psycopg2 && \
8+
sudo gem install bundler

README.md

Lines changed: 29 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -47,29 +47,34 @@ psql -h 127.0.0.1 -p 6432 -c 'SELECT 1'
4747

4848
### Config
4949

50-
| **Name** | **Description** | **Examples** |
51-
|-------------------------|--------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------|
52-
| **`general`** | | |
53-
| `host` | The pooler will run on this host, 0.0.0.0 means accessible from everywhere. | `0.0.0.0` |
54-
| `port` | The pooler will run on this port. | `6432` |
55-
| `pool_size` | Maximum allowed server connections per pool. Pools are separated for each user/shard/server role. The connections are allocated as needed. | `15` |
56-
| `pool_mode` | The pool mode to use, i.e. `session` or `transaction`. | `transaction` |
57-
| `connect_timeout` | Maximum time to establish a connection to a server (milliseconds). If reached, the server is banned and the next target is attempted. | `5000` |
58-
| `healthcheck_timeout` | Maximum time to pass a health check (`SELECT 1`, milliseconds). If reached, the server is banned and the next target is attempted. | `1000` |
59-
| `shutdown_timeout` | Maximum time to give clients during shutdown before forcibly killing client connections (ms). | `60000` |
60-
| `ban_time` | Ban time for a server (seconds). It won't be allowed to serve transactions until the ban expires; failover targets will be used instead. | `60` |
61-
| | | |
62-
| **`user`** | | |
63-
| `name` | The user name. | `sharding_user` |
64-
| `password` | The user password in plaintext. | `hunter2` |
65-
| | | |
66-
| **`shards`** | Shards are numerically numbered starting from 0; the order in the config is preserved by the pooler to route queries accordingly. | `[shards.0]` |
67-
| `servers` | List of servers to connect to and their roles. A server is: `[host, port, role]`, where `role` is either `primary` or `replica`. | `["127.0.0.1", 5432, "primary"]` |
68-
| `database` | The name of the database to connect to. This is the same on all servers that are part of one shard. | |
69-
| **`query_router`** | | |
70-
| `default_role` | Traffic is routed to this role by default (round-robin), unless the client specifies otherwise. Default is `any`, for any role available. | `any`, `primary`, `replica` |
71-
| `query_parser_enabled` | Enable the query parser which will inspect incoming queries and route them to a primary or replicas. | `false` |
72-
| `primary_reads_enabled` | Enable this to allow read queries on the primary; otherwise read queries are routed to the replicas. | `true` |
50+
| **Name** | **Description** | **Examples** |
51+
|------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------|
52+
| **`general`** | | |
53+
| `host` | The pooler will run on this host, 0.0.0.0 means accessible from everywhere. | `0.0.0.0` |
54+
| `port` | The pooler will run on this port. | `6432` |
55+
| `enable_prometheus_exporter` | Enable prometheus exporter which will export metrics in prometheus exposition format. | `true` |
56+
| `prometheus_exporter_port` | Port at which prometheus exporter listens on. | `9930` |
57+
| `pool_size` | Maximum allowed server connections per pool. Pools are separated for each user/shard/server role. The connections are allocated as needed. | `15` |
58+
| `pool_mode` | The pool mode to use, i.e. `session` or `transaction`. | `transaction` |
59+
| `connect_timeout` | Maximum time to establish a connection to a server (milliseconds). If reached, the server is banned and the next target is attempted. | `5000` |
60+
| `healthcheck_timeout` | Maximum time to pass a health check (`SELECT 1`, milliseconds). If reached, the server is banned and the next target is attempted. | `1000` |
61+
| `shutdown_timeout` | Maximum time to give clients during shutdown before forcibly killing client connections (ms). | `60000` |
62+
| `healthcheck_delay` | How long to keep connection available for immediate re-use, without running a healthcheck query on it | `30000` |
63+
| `ban_time` | Ban time for a server (seconds). It won't be allowed to serve transactions until the ban expires; failover targets will be used instead. | `60` |
64+
| `autoreload` | Enable auto-reload of config after fixed time-interval. | `false` |
65+
| | | |
66+
| **`user`** | | |
67+
| `name` | The user name. | `sharding_user` |
68+
| `password` | The user password in plaintext. | `hunter2` |
69+
| | | |
70+
| **`shards`** | Shards are numerically numbered starting from 0; the order in the config is preserved by the pooler to route queries accordingly. | `[shards.0]` |
71+
| `servers` | List of servers to connect to and their roles. A server is: `[host, port, role]`, where `role` is either `primary` or `replica`. | `["127.0.0.1", 5432, "primary"]` |
72+
| `database` | The name of the database to connect to. This is the same on all servers that are part of one shard. | |
73+
| | | |
74+
| **`query_router`** | | |
75+
| `default_role` | Traffic is routed to this role by default (round-robin), unless the client specifies otherwise. Default is `any`, for any role available. | `any`, `primary`, `replica` |
76+
| `query_parser_enabled` | Enable the query parser which will inspect incoming queries and route them to a primary or replicas. | `false` |
77+
| `primary_reads_enabled` | Enable this to allow read queries on the primary; otherwise read queries are routed to the replicas. | `true` |
7378

7479
## Local development
7580

@@ -261,6 +266,7 @@ The config can be reloaded by sending a `kill -s SIGHUP` to the process or by qu
261266
| `connect_timeout` | yes |
262267
| `healthcheck_timeout` | no |
263268
| `shutdown_timeout` | no |
269+
| `healthcheck_delay` | no |
264270
| `ban_time` | no |
265271
| `user` | yes |
266272
| `shards` | yes |

docker-compose.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
version: "3"
22
services:
33
postgres:
4-
image: postgres:13
4+
image: postgres:14
55
environment:
66
POSTGRES_PASSWORD: postgres
77
POSTGRES_HOST_AUTH_METHOD: md5

examples/docker/pgcat.toml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,21 @@ host = "0.0.0.0"
1111
# Port to run on, same as PgBouncer used in this example.
1212
port = 6432
1313

14-
# enable prometheus exporter on port 9930
14+
# Whether to enable prometheus exporter or not.
1515
enable_prometheus_exporter = true
1616

17+
# Port at which prometheus exporter listens on.
18+
prometheus_exporter_port = 9930
19+
1720
# How long to wait before aborting a server connection (ms).
1821
connect_timeout = 5000
1922

2023
# How much time to give `SELECT 1` health check query to return with a result (ms).
2124
healthcheck_timeout = 1000
2225

26+
# How long to keep connection available for immediate re-use, without running a healthcheck query on it
27+
healthcheck_delay = 30000
28+
2329
# How much time to give clients during shutdown before forcibly killing client connections (ms).
2430
shutdown_timeout = 60000
2531

pgcat.toml

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,21 @@ host = "0.0.0.0"
1111
# Port to run on, same as PgBouncer used in this example.
1212
port = 6432
1313

14-
# enable prometheus exporter on port 9930
14+
# Whether to enable prometheus exporter or not.
1515
enable_prometheus_exporter = true
1616

17+
# Port at which prometheus exporter listens on.
18+
prometheus_exporter_port = 9930
19+
1720
# How long to wait before aborting a server connection (ms).
1821
connect_timeout = 5000
1922

2023
# How much time to give the health check query to return with a result (ms).
2124
healthcheck_timeout = 1000
2225

26+
# How long to keep connection available for immediate re-use, without running a healthcheck query on it
27+
healthcheck_delay = 30000
28+
2329
# How much time to give clients during shutdown before forcibly killing client connections (ms).
2430
shutdown_timeout = 60000
2531

@@ -86,10 +92,14 @@ password = "sharding_user"
8692
# is the sum of pool_size across all users.
8793
pool_size = 9
8894

95+
# Maximum query duration. Dangerous, but protetcts against DBs that died and a non-obvious way.
96+
statement_timeout = 0
97+
8998
[pools.sharded_db.users.1]
9099
username = "other_user"
91100
password = "other_user"
92101
pool_size = 21
102+
statement_timeout = 15000
93103

94104
# Shard 0
95105
[pools.sharded_db.shards.0]
@@ -127,6 +137,7 @@ sharding_function = "pg_bigint_hash"
127137
username = "simple_user"
128138
password = "simple_user"
129139
pool_size = 5
140+
statement_timeout = 0
130141

131142
[pools.simple_db.shards.0]
132143
servers = [

0 commit comments

Comments
 (0)