Skip to content

Commit a45cd1b

Browse files
authored
Better USDT probes for ClickHouse protocol (#8622)
This adds more observability into the network protocol used to talk to ClickHouse, in an attempt to help resolve #8595
1 parent abd5ed0 commit a45cd1b

File tree

12 files changed

+199
-137
lines changed

12 files changed

+199
-137
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,7 @@ cargo_metadata = "0.20.0"
382382
chacha20poly1305 = "0.10.1"
383383
cfg-if = "1.0"
384384
chrono = { version = "0.4", features = [ "serde" ] }
385-
chrono-tz = "0.10.3"
385+
chrono-tz = { version = "0.10.3", features = [ "serde" ] }
386386
ciborium = "0.2.2"
387387
clap = { version = "4.5", features = ["cargo", "derive", "env", "wrap_help"] }
388388
clickana = { path = "dev-tools/clickana" }

oximeter/db/benches/protocol.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,11 @@ async fn native_impl(
3737
conn: &Arc<Mutex<Connection>>,
3838
query: &str,
3939
) {
40-
conn.lock().await.query(query).await.expect("Expected to run query");
40+
conn.lock()
41+
.await
42+
.query(uuid::Uuid::new_v4(), query)
43+
.await
44+
.expect("Expected to run query");
4145
}
4246

4347
/// Setup the native query benchmark.

oximeter/db/src/client/mod.rs

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
//! Rust client to ClickHouse database
66
7-
// Copyright 2024 Oxide Computer Company
7+
// Copyright 2025 Oxide Computer Company
88

99
pub(crate) mod dbwrite;
1010
#[cfg(any(feature = "oxql", test))]
@@ -78,10 +78,16 @@ const DEFAULT_REQUEST_TIMEOUT: Duration = Duration::from_secs(30);
7878
#[usdt::provider(provider = "clickhouse_client")]
7979
mod probes {
8080
/// Fires when a SQL query begins, with the query string.
81-
fn sql__query__start(_: &usdt::UniqueId, sql: &str) {}
81+
///
82+
/// The first argument is a UUID assigned to the query. This is sent to the
83+
/// ClickHouse server along with the query string, where it's used as the
84+
/// query ID in tables like the query log. This can be used to correlate
85+
/// behavior on the client side with how the server itself processes the
86+
/// query.
87+
fn sql__query__start(query_id: &str, sql: &str) {}
8288

8389
/// Fires when a SQL query ends, either in success or failure.
84-
fn sql__query__done(_: &usdt::UniqueId) {}
90+
fn sql__query__done(query_id: &str) {}
8591
}
8692

8793
/// A `Client` to the ClickHouse metrics database.
@@ -1124,16 +1130,16 @@ impl Client {
11241130
"n_rows" => block.n_rows(),
11251131
"n_columns" => block.n_columns(),
11261132
);
1127-
let id = usdt::UniqueId::new();
1128-
probes::sql__query__start!(|| (&id, sql));
1133+
let id = Uuid::new_v4();
1134+
probes::sql__query__start!(|| (id.to_string(), sql));
11291135
let now = tokio::time::Instant::now();
11301136
let result = tokio::time::timeout(
11311137
self.request_timeout,
1132-
handle.insert(sql, block),
1138+
handle.insert(id, sql, block),
11331139
)
11341140
.await;
11351141
let elapsed = now.elapsed();
1136-
probes::sql__query__done!(|| (&id));
1142+
probes::sql__query__done!(|| id.to_string());
11371143
match result {
11381144
Ok(result) => result.map_err(Error::from),
11391145
Err(_) => Err(Error::DatabaseUnavailable(format!(
@@ -1166,13 +1172,14 @@ impl Client {
11661172
"sql" => sql,
11671173
);
11681174

1169-
let id = usdt::UniqueId::new();
1170-
probes::sql__query__start!(|| (&id, sql));
1175+
let id = Uuid::new_v4();
1176+
probes::sql__query__start!(|| (id.to_string(), sql));
11711177
let now = tokio::time::Instant::now();
11721178
let result =
1173-
tokio::time::timeout(self.request_timeout, handle.query(sql)).await;
1179+
tokio::time::timeout(self.request_timeout, handle.query(id, sql))
1180+
.await;
11741181
let elapsed = now.elapsed();
1175-
probes::sql__query__done!(|| (&id));
1182+
probes::sql__query__done!(|| id.to_string());
11761183
match result {
11771184
Ok(result) => result.map_err(Error::from),
11781185
Err(_) => Err(Error::DatabaseUnavailable(format!(

oximeter/db/src/native/block.rs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// License, v. 2.0. If a copy of the MPL was not distributed with this
33
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
44
//
5-
// Copyright 2024 Oxide Computer Company
5+
// Copyright 2025 Oxide Computer Company
66

77
//! Types for working with actual blocks and columns of data.
88
@@ -45,7 +45,7 @@ use uuid::Uuid;
4545
/// results of a query. It is not necessarily the entire set of data -- most
4646
/// queries result in sending more than one block. But every query (and some of
4747
/// the management or status commands) sends data as one or more block.
48-
#[derive(Clone, Debug, PartialEq)]
48+
#[derive(Clone, Debug, PartialEq, serde::Serialize)]
4949
pub struct Block {
5050
/// A name for the block.
5151
///
@@ -282,7 +282,7 @@ impl Block {
282282
///
283283
/// This is only used for a few special kinds of queries. See the fields for
284284
/// details.
285-
#[derive(Clone, Copy, Debug, PartialEq)]
285+
#[derive(Clone, Copy, Debug, PartialEq, serde::Serialize)]
286286
pub struct BlockInfo {
287287
/// True if this is an "overflow" block, which is the case if:
288288
///
@@ -325,7 +325,7 @@ impl Default for BlockInfo {
325325
///
326326
/// This represents a single column of data fetched in a query or sent in an
327327
/// insert. It includes the type implicitly, in the value array it contains.
328-
#[derive(Clone, Debug, PartialEq)]
328+
#[derive(Clone, Debug, PartialEq, serde::Serialize)]
329329
pub struct Column {
330330
/// The data values for this column.
331331
pub values: ValueArray,
@@ -370,7 +370,7 @@ impl Column {
370370
}
371371

372372
/// An array of singly-typed data values from the server.
373-
#[derive(Clone, Debug, PartialEq)]
373+
#[derive(Clone, Debug, PartialEq, serde::Serialize)]
374374
pub enum ValueArray {
375375
Bool(Vec<bool>),
376376
UInt8(Vec<u8>),
@@ -689,7 +689,8 @@ impl From<Vec<IpAddr>> for ValueArray {
689689
}
690690
}
691691

692-
#[derive(Clone, Copy, Debug, PartialEq)]
692+
#[derive(Clone, Copy, Debug, PartialEq, serde::Serialize)]
693+
#[serde(transparent)]
693694
pub struct Precision(u8);
694695

695696
impl TryFrom<u8> for Precision {
@@ -792,7 +793,7 @@ impl fmt::Display for Precision {
792793
}
793794

794795
/// A type of a column of data.
795-
#[derive(Clone, Debug, PartialEq)]
796+
#[derive(Clone, Debug, PartialEq, serde::Serialize)]
796797
pub enum DataType {
797798
Bool,
798799
UInt8,

0 commit comments

Comments
 (0)