From f22e5b0f85ac0a667b1fab712abb8112d1dbc465 Mon Sep 17 00:00:00 2001 From: Eitan Seri-Levi Date: Tue, 16 Jan 2024 10:44:26 +0200 Subject: [PATCH] Critical dependency logging (#4988) * add metrics layer * add metrics * simplify getting the target * make clippy happy * fix typos * unify deps under workspace * make import statement shorter, fix typos * enable warn by default, mark flag as deprecated * do not exit on error when initializing logging fails * revert exit on error * adjust bootnode logging * add logging layer * non blocking file writer * non blocking file writer * add tracing visitor * use target as is by default * make libp2p events register correctly * adjust repilcated cli help * refactor tracing layer * linting * filesize * log gossipsub, dont filter by log level * turn on debug logs by default, remove deprecation warning * truncate file, add timestamp, add unit test * suppress output (#5) * use tracing appender * cleanup * Add a task to remove old log files and upgrade to warn level * Add the time feature for tokio * Udeps and fmt --------- Co-authored-by: Diva M Co-authored-by: Divma <26765164+divagant-martian@users.noreply.github.com> Co-authored-by: Age Manning --- Cargo.lock | 48 +++++++- Cargo.toml | 3 +- common/directory/src/lib.rs | 1 + common/logging/Cargo.toml | 4 +- common/logging/src/lib.rs | 48 ++++++++ common/logging/src/tracing_logging_layer.rs | 115 ++++++++++++++++++++ lighthouse/src/main.rs | 54 ++++----- 7 files changed, 238 insertions(+), 35 deletions(-) create mode 100644 common/logging/src/tracing_logging_layer.rs diff --git a/Cargo.lock b/Cargo.lock index 532388cf4c7..306580e4303 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4793,6 +4793,8 @@ dependencies = [ "sloggers", "take_mut", "tokio", + "tracing", + "tracing-appender", "tracing-core", "tracing-log", "tracing-subscriber", @@ -4857,6 +4859,15 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4" +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata 0.1.10", +] + [[package]] name = "matches" version = "0.1.10" @@ -6406,8 +6417,17 @@ checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" dependencies = [ "aho-corasick", "memchr", - "regex-automata", - "regex-syntax", + "regex-automata 0.4.3", + "regex-syntax 0.8.2", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax 0.6.29", ] [[package]] @@ -6418,9 +6438,15 @@ checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-syntax 0.8.2", ] +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + [[package]] name = "regex-syntax" version = "0.8.2" @@ -8190,6 +8216,18 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "tracing-appender" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3566e8ce28cc0a3fe42519fc80e6b4c943cc4c8cef275620eb8dac2d3d4e06cf" +dependencies = [ + "crossbeam-channel", + "thiserror", + "time", + "tracing-subscriber", +] + [[package]] name = "tracing-attributes" version = "0.1.27" @@ -8238,10 +8276,14 @@ version = "0.3.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" dependencies = [ + "matchers", "nu-ansi-term", + "once_cell", + "regex", "sharded-slab", "smallvec", "thread_local", + "tracing", "tracing-core", "tracing-log", ] diff --git a/Cargo.toml b/Cargo.toml index f43d375f0d9..a3e1430f3fc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -160,9 +160,10 @@ tempfile = "3" tokio = { version = "1", features = ["rt-multi-thread", "sync", "signal"] } tokio-stream = { version = "0.1", features = ["sync"] } tokio-util = { version = "0.6", features = ["codec", "compat", "time"] } +tracing-appender = "0.2" tracing-core = "0.1" tracing-log = "0.2" -tracing-subscriber = "0.3" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } tree_hash = "0.5" tree_hash_derive = "0.5" url = "2" diff --git a/common/directory/src/lib.rs b/common/directory/src/lib.rs index 62b98aab94e..e8585c504a0 100644 --- a/common/directory/src/lib.rs +++ b/common/directory/src/lib.rs @@ -10,6 +10,7 @@ pub const DEFAULT_NETWORK_DIR: &str = "network"; pub const DEFAULT_VALIDATOR_DIR: &str = "validators"; pub const DEFAULT_SECRET_DIR: &str = "secrets"; pub const DEFAULT_WALLET_DIR: &str = "wallets"; +pub const DEFAULT_TRACING_DIR: &str = "tracing"; /// Base directory name for unnamed testnets passed through the --testnet-dir flag pub const CUSTOM_TESTNET_DIR: &str = "custom"; diff --git a/common/logging/Cargo.toml b/common/logging/Cargo.toml index 6975711651b..1fad56d475c 100644 --- a/common/logging/Cargo.toml +++ b/common/logging/Cargo.toml @@ -19,7 +19,9 @@ slog-async = { workspace = true } slog-term = { workspace = true } sloggers = { workspace = true } take_mut = "0.2.2" -tokio = { workspace = true } +tokio = { workspace = true, features = [ "time" ] } +tracing = "0.1" tracing-core = { workspace = true } tracing-log = { workspace = true } tracing-subscriber = { workspace = true } +tracing-appender = { workspace = true } diff --git a/common/logging/src/lib.rs b/common/logging/src/lib.rs index 80fc00a00f2..5217d541cf5 100644 --- a/common/logging/src/lib.rs +++ b/common/logging/src/lib.rs @@ -7,15 +7,21 @@ use lighthouse_metrics::{ use slog::Logger; use slog_term::Decorator; use std::io::{Result, Write}; +use std::path::PathBuf; use std::time::{Duration, Instant}; +use tracing_appender::non_blocking::NonBlocking; +use tracing_logging_layer::LoggingLayer; +use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; pub const MAX_MESSAGE_WIDTH: usize = 40; pub mod async_record; mod sse_logging_components; +mod tracing_logging_layer; mod tracing_metrics_layer; pub use sse_logging_components::SSELoggingComponents; +pub use tracing_logging_layer::cleanup_logging_task; pub use tracing_metrics_layer::MetricsLayer; /// The minimum interval between log messages indicating that a queue is full. @@ -217,6 +223,48 @@ impl TimeLatch { } } +pub fn create_tracing_layer(base_tracing_log_path: PathBuf, turn_on_terminal_logs: bool) { + let filter_layer = match tracing_subscriber::EnvFilter::try_from_default_env() + .or_else(|_| tracing_subscriber::EnvFilter::try_new("warn")) + { + Ok(filter) => filter, + Err(e) => { + eprintln!("Failed to initialize dependency logging {e}"); + return; + } + }; + + let libp2p_writer = + tracing_appender::rolling::daily(base_tracing_log_path.clone(), "libp2p.log"); + let discv5_writer = + tracing_appender::rolling::daily(base_tracing_log_path.clone(), "discv5.log"); + + let (libp2p_non_blocking_writer, libp2p_guard) = NonBlocking::new(libp2p_writer); + let (discv5_non_blocking_writer, discv5_guard) = NonBlocking::new(discv5_writer); + + let custom_layer = LoggingLayer { + libp2p_non_blocking_writer, + libp2p_guard, + discv5_non_blocking_writer, + discv5_guard, + }; + + if let Err(e) = tracing_subscriber::fmt() + .with_env_filter(filter_layer) + .with_writer(move || { + tracing_subscriber::fmt::writer::OptionalWriter::::from( + turn_on_terminal_logs.then(std::io::stdout), + ) + }) + .finish() + .with(MetricsLayer) + .with(custom_layer) + .try_init() + { + eprintln!("Failed to initialize dependency logging {e}"); + } +} + /// Return a logger suitable for test usage. /// /// By default no logs will be printed, but they can be enabled via diff --git a/common/logging/src/tracing_logging_layer.rs b/common/logging/src/tracing_logging_layer.rs new file mode 100644 index 00000000000..a74e24bdb69 --- /dev/null +++ b/common/logging/src/tracing_logging_layer.rs @@ -0,0 +1,115 @@ +use chrono::{naive::Days, prelude::*}; +use slog::{debug, warn}; +use std::io::Write; +use tracing::Subscriber; +use tracing_appender::non_blocking::{NonBlocking, WorkerGuard}; +use tracing_subscriber::layer::Context; +use tracing_subscriber::Layer; + +pub struct LoggingLayer { + pub libp2p_non_blocking_writer: NonBlocking, + pub libp2p_guard: WorkerGuard, + pub discv5_non_blocking_writer: NonBlocking, + pub discv5_guard: WorkerGuard, +} + +impl Layer for LoggingLayer +where + S: Subscriber, +{ + fn on_event(&self, event: &tracing::Event<'_>, _ctx: Context) { + let meta = event.metadata(); + let log_level = meta.level(); + let timestamp = Local::now().format("%Y-%m-%d %H:%M:%S").to_string(); + + let target = match meta.target().split_once("::") { + Some((crate_name, _)) => crate_name, + None => "unknown", + }; + + let mut writer = match target { + "libp2p_gossipsub" => self.libp2p_non_blocking_writer.clone(), + "discv5" => self.discv5_non_blocking_writer.clone(), + _ => return, + }; + + let mut visitor = LogMessageExtractor { + message: String::default(), + }; + + event.record(&mut visitor); + let message = format!("{} {} {}\n", timestamp, log_level, visitor.message); + + if let Err(e) = writer.write_all(message.as_bytes()) { + eprintln!("Failed to write log: {}", e); + } + } +} + +struct LogMessageExtractor { + message: String, +} + +impl tracing_core::field::Visit for LogMessageExtractor { + fn record_debug(&mut self, _: &tracing_core::Field, value: &dyn std::fmt::Debug) { + self.message = format!("{} {:?}", self.message, value); + } +} + +/// Creates a long lived async task that routinely deletes old tracing log files +pub async fn cleanup_logging_task(path: std::path::PathBuf, log: slog::Logger) { + loop { + // Delay for 1 day and then prune old logs + tokio::time::sleep(std::time::Duration::from_secs(60 * 60 * 24)).await; + + let Some(yesterday_date) = chrono::prelude::Local::now() + .naive_local() + .checked_sub_days(Days::new(1)) + else { + warn!(log, "Could not calculate the current date"); + return; + }; + + // Search for old log files + let dir = path.as_path(); + + if dir.is_dir() { + let Ok(files) = std::fs::read_dir(dir) else { + warn!(log, "Could not read log directory contents"; "path" => ?dir); + break; + }; + + for file in files { + let Ok(dir_entry) = file else { + warn!(log, "Could not read file"); + continue; + }; + + let Ok(file_name) = dir_entry.file_name().into_string() else { + warn!(log, "Could not read file"; "file" => ?dir_entry); + continue; + }; + + if file_name.starts_with("libp2p.log") | file_name.starts_with("discv5.log") { + let log_file_date = file_name.split('.').collect::>(); + if log_file_date.len() == 3 { + let Ok(log_file_date_type) = + NaiveDate::parse_from_str(log_file_date[2], "%Y-%m-%d") + else { + warn!(log, "Could not parse log file date"; "file" => file_name); + continue; + }; + + if log_file_date_type < yesterday_date.into() { + // Delete the file, its too old + debug!(log, "Removing old log file"; "file" => &file_name); + if let Err(e) = std::fs::remove_file(dir_entry.path()) { + warn!(log, "Failed to remove log file"; "file" => file_name, "error" => %e); + } + } + } + } + } + } + } +} diff --git a/lighthouse/src/main.rs b/lighthouse/src/main.rs index ccdfa60f114..b8cedfde0d7 100644 --- a/lighthouse/src/main.rs +++ b/lighthouse/src/main.rs @@ -14,7 +14,6 @@ use slog::{crit, info}; use std::path::PathBuf; use std::process::exit; use task_executor::ShutdownReason; -use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; use types::{EthSpec, EthSpecId}; use validator_client::ProductionValidatorClient; @@ -365,34 +364,6 @@ fn main() { } } - // read the `RUST_LOG` statement - let filter_layer = match tracing_subscriber::EnvFilter::try_from_default_env() - .or_else(|_| tracing_subscriber::EnvFilter::try_new("debug")) - { - Ok(filter) => filter, - Err(e) => { - eprintln!("Failed to initialize dependency logging {e}"); - exit(1) - } - }; - - let turn_on_terminal_logs = matches.is_present("env_log"); - - if let Err(e) = tracing_subscriber::fmt() - .with_env_filter(filter_layer) - .with_writer(move || { - tracing_subscriber::fmt::writer::OptionalWriter::::from( - turn_on_terminal_logs.then(std::io::stdout), - ) - }) - .finish() - .with(logging::MetricsLayer) - .try_init() - { - eprintln!("Failed to initialize dependency logging {e}"); - exit(1) - } - let result = get_eth2_network_config(&matches).and_then(|eth2_network_config| { let eth_spec_id = eth2_network_config.eth_spec_id()?; @@ -534,7 +505,7 @@ fn run( }; let logger_config = LoggerConfig { - path: log_path, + path: log_path.clone(), debug_level: String::from(debug_level), logfile_debug_level: String::from(logfile_debug_level), log_format: log_format.map(String::from), @@ -557,6 +528,29 @@ fn run( let log = environment.core_context().log().clone(); + let mut tracing_log_path: Option = clap_utils::parse_optional(matches, "logfile")?; + + if tracing_log_path.is_none() { + tracing_log_path = Some( + parse_path_or_default(matches, "datadir")? + .join(DEFAULT_BEACON_NODE_DIR) + .join("logs"), + ) + } + + let path = tracing_log_path.clone().unwrap(); + + let turn_on_terminal_logs = matches.is_present("env_log"); + + // Run a task to clean up old tracing logs. + let log_cleaner_context = environment.service_context("log_cleaner".to_string()); + log_cleaner_context.executor.spawn( + logging::cleanup_logging_task(path.clone(), log.clone()), + "log_cleaner", + ); + + logging::create_tracing_layer(path, turn_on_terminal_logs); + // Allow Prometheus to export the time at which the process was started. metrics::expose_process_start_time(&log);