Skip to content

Commit 8692c8c

Browse files
committed
Introduce telemetry for observability
This introduces the foundational telemetry infrastructure to improve the observability of LDK Server. It adds a new `/metrics` endpoint exposed on the REST service address, which serves Prometheus-compatible metrics. This endpoint is public and does not require HMAC authentication, allowing for easy integration with monitoring systems. - Added `prometheus` dependency and a `Metrics` utility struct. - Introduced a basic `ldk_health_score` gauge (0-100) that reflects the node's operational status based on connection to peer, sync state, and running status. This is the first step in a larger effort to provide comprehensive telemetry. Future updates will expand this to include metrics for channels, balances, payments, and other critical node activities.
1 parent 2766533 commit 8692c8c

File tree

10 files changed

+270
-9
lines changed

10 files changed

+270
-9
lines changed

Cargo.lock

Lines changed: 63 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ldk-server-protos/src/endpoints.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,4 @@ pub const LIST_FORWARDED_PAYMENTS_PATH: &str = "ListForwardedPayments";
2626
pub const UPDATE_CHANNEL_CONFIG_PATH: &str = "UpdateChannelConfig";
2727
pub const GET_PAYMENT_DETAILS_PATH: &str = "GetPaymentDetails";
2828
pub const CONNECT_PEER_PATH: &str = "ConnectPeer";
29+
pub const GET_METRICS_PATH: &str = "metrics";

ldk-server/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ toml = { version = "0.8.9", default-features = false, features = ["parse"] }
2323
chrono = { version = "0.4", default-features = false, features = ["clock"] }
2424
log = "0.4.28"
2525
base64 = { version = "0.21", default-features = false, features = ["std"] }
26+
lazy_static = "1.5.0"
27+
prometheus = "0.14.0"
2628

2729
# Required for RabittMQ based EventPublisher. Only enabled for `events-rabbitmq` feature.
2830
lapin = { version = "2.4.0", features = ["rustls"], default-features = false, optional = true }

ldk-server/src/api/error.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,3 +131,15 @@ impl From<NodeError> for LdkServerError {
131131
LdkServerError::new(error_code, message)
132132
}
133133
}
134+
135+
impl From<prometheus::Error> for LdkServerError {
136+
fn from(e: prometheus::Error) -> Self {
137+
LdkServerError::new(LdkServerErrorCode::InternalServerError, e.to_string())
138+
}
139+
}
140+
141+
impl From<std::string::FromUtf8Error> for LdkServerError {
142+
fn from(e: std::string::FromUtf8Error) -> Self {
143+
LdkServerError::new(LdkServerErrorCode::InternalServerError, e.to_string())
144+
}
145+
}

ldk-server/src/api/metrics.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// This file is Copyright its original authors, visible in version control
2+
// history.
3+
//
4+
// This file is licensed under the Apache License, Version 2.0 <LICENSE-APACHE
5+
// or http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your option.
7+
// You may not use this file except in accordance with one or both of these
8+
// licenses.
9+
10+
use crate::api::error::LdkServerError;
11+
use crate::util::metrics::METRICS;
12+
13+
pub(crate) fn handle_metrics_request() -> Result<String, LdkServerError> {
14+
match METRICS.gather_metrics() {
15+
Ok(metrics) => Ok(metrics),
16+
Err(e) => Err(e),
17+
}
18+
}

ldk-server/src/api/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ pub(crate) mod get_payment_details;
2626
pub(crate) mod list_channels;
2727
pub(crate) mod list_forwarded_payments;
2828
pub(crate) mod list_payments;
29+
pub(crate) mod metrics;
2930
pub(crate) mod onchain_receive;
3031
pub(crate) mod onchain_send;
3132
pub(crate) mod open_channel;

ldk-server/src/main.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ use crate::io::persist::{
5050
use crate::service::NodeService;
5151
use crate::util::config::{load_config, ChainSource};
5252
use crate::util::logger::ServerLogger;
53+
use crate::util::metrics::{BUILD_METRICS_INTERVAL, METRICS};
5354
use crate::util::proto_adapter::{forwarded_payment_to_proto, payment_to_proto};
5455
use crate::util::tls::get_or_generate_tls_config;
5556

@@ -291,6 +292,16 @@ fn main() {
291292
}
292293
};
293294
let event_node = Arc::clone(&node);
295+
296+
let metrics_node = Arc::clone(&node);
297+
let mut interval = tokio::time::interval(BUILD_METRICS_INTERVAL);
298+
runtime.spawn(async move {
299+
loop {
300+
interval.tick().await;
301+
METRICS.update_service_health_score(&metrics_node);
302+
}
303+
});
304+
294305
let rest_svc_listener = TcpListener::bind(config_file.rest_service_addr)
295306
.await
296307
.expect("Failed to bind listening port");

ldk-server/src/service.rs

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ use ldk_node::Node;
2121
use ldk_server_protos::endpoints::{
2222
BOLT11_RECEIVE_PATH, BOLT11_SEND_PATH, BOLT12_RECEIVE_PATH, BOLT12_SEND_PATH,
2323
CLOSE_CHANNEL_PATH, CONNECT_PEER_PATH, FORCE_CLOSE_CHANNEL_PATH, GET_BALANCES_PATH,
24-
GET_NODE_INFO_PATH, GET_PAYMENT_DETAILS_PATH, LIST_CHANNELS_PATH, LIST_FORWARDED_PAYMENTS_PATH,
25-
LIST_PAYMENTS_PATH, ONCHAIN_RECEIVE_PATH, ONCHAIN_SEND_PATH, OPEN_CHANNEL_PATH, SPLICE_IN_PATH,
26-
SPLICE_OUT_PATH, UPDATE_CHANNEL_CONFIG_PATH,
24+
GET_METRICS_PATH, GET_NODE_INFO_PATH, GET_PAYMENT_DETAILS_PATH, LIST_CHANNELS_PATH,
25+
LIST_FORWARDED_PAYMENTS_PATH, LIST_PAYMENTS_PATH, ONCHAIN_RECEIVE_PATH, ONCHAIN_SEND_PATH,
26+
OPEN_CHANNEL_PATH, SPLICE_IN_PATH, SPLICE_OUT_PATH, UPDATE_CHANNEL_CONFIG_PATH,
2727
};
2828
use prost::Message;
2929

@@ -41,6 +41,7 @@ use crate::api::get_payment_details::handle_get_payment_details_request;
4141
use crate::api::list_channels::handle_list_channels_request;
4242
use crate::api::list_forwarded_payments::handle_list_forwarded_payments_request;
4343
use crate::api::list_payments::handle_list_payments_request;
44+
use crate::api::metrics::handle_metrics_request;
4445
use crate::api::onchain_receive::handle_onchain_receive_request;
4546
use crate::api::onchain_send::handle_onchain_send_request;
4647
use crate::api::open_channel::handle_open_channel;
@@ -148,6 +149,25 @@ impl Service<Request<Incoming>> for NodeService {
148149
type Future = Pin<Box<dyn Future<Output = Result<Self::Response, Self::Error>> + Send>>;
149150

150151
fn call(&self, req: Request<Incoming>) -> Self::Future {
152+
// Handle metrics endpoint separately to bypass auth and return plain text
153+
if req.uri().path().len() > 1 && &req.uri().path()[1..] == GET_METRICS_PATH {
154+
return Box::pin(async move {
155+
match handle_metrics_request() {
156+
Ok(metrics) => Ok(Response::builder()
157+
.header("Content-Type", "text/plain")
158+
.body(Full::new(Bytes::from(metrics)))
159+
.unwrap()),
160+
Err(e) => {
161+
let (error_response, status_code) = to_error_response(e);
162+
Ok(Response::builder()
163+
.status(status_code)
164+
.body(Full::new(Bytes::from(error_response.encode_to_vec())))
165+
.unwrap())
166+
},
167+
}
168+
});
169+
}
170+
151171
// Extract auth params from headers (validation happens after body is read)
152172
let auth_params = match extract_auth_params(&req) {
153173
Ok(params) => params,

0 commit comments

Comments
 (0)