Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
<PackageVersion Include="Microsoft.NET.Test.Sdk" Version="18.0.1" />
<PackageVersion Include="Microsoft.IdentityModel.Protocols.OpenIdConnect" Version="8.6.1" />
<PackageVersion Include="Microsoft.IdentityModel.Validators" Version="8.6.1" />
<PackageVersion Include="OpenTelemetry" Version="1.13.1" />
<PackageVersion Include="OpenTelemetry.Exporter.OpenTelemetryProtocol" Version="1.13.1" />
<PackageVersion Include="StackExchange.Redis" Version="2.9.25" />
<PackageVersion Include="System.IdentityModel.Tokens.Jwt" Version="8.6.1" />
<PackageVersion Include="System.Interactive.Async" Version="6.0.1" />
Expand Down
19 changes: 19 additions & 0 deletions libs/host/Configuration/Options.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
using Garnet.server.Auth.Settings;
using Garnet.server.TLS;
using Microsoft.Extensions.Logging;
using OpenTelemetry.Exporter;
using Tsavorite.core;
using Tsavorite.devices;

Expand Down Expand Up @@ -363,6 +364,20 @@ internal sealed class Options : ICloneable
[Option("metrics-sampling-freq", Required = false, HelpText = "Metrics sampling frequency in seconds. Value of 0 disables metrics monitor task.")]
public int MetricsSamplingFrequency { get; set; }

[Option("opentelemetry-endpoint", Required = false, HelpText = "The endpoint to which OpenTelemetry metrics will be exported. If null, OpenTelemetry metrics will not be exported.")]
public Uri OpenTelemetryEndpoint { get; set; }

[IntRangeValidation(0, int.MaxValue, isRequired: false)]
[Option("opentelemetry-export-interval", Required = false, HelpText = "The interval in milliseconds to export OpenTelemetry metrics. If 0, the default interval of 60 seconds will be used.")]
public int OpenTelemetryExportInterval { get; set; }

[Option("opentelemetry-export-protocol", Required = false, HelpText = "The protocol to use when exporting OpenTelemetry metrics. Value options: Grpc, HttpProtobuf. If null, the default protocol will be used.")]
public OtlpExportProtocol? OpenTelemetryExportProtocol { get; set; }

[IntRangeValidation(0, int.MaxValue, isRequired: false)]
[Option("opentelemetry-export-timeout", Required = false, HelpText = "The timeout in milliseconds when exporting OpenTelemetry metrics. If 0, the default timeout of 10 seconds will be used.")]
public int OpenTelemetryExportTimeout { get; set; }

[OptionValidation]
[Option('q', Required = false, HelpText = "Enabling quiet mode does not print server version and text art.")]
public bool? QuietMode { get; set; }
Expand Down Expand Up @@ -914,6 +929,10 @@ public GarnetServerOptions GetServerOptions(ILogger logger = null)
SlowLogThreshold = SlowLogThreshold,
SlowLogMaxEntries = SlowLogMaxEntries,
MetricsSamplingFrequency = MetricsSamplingFrequency,
OpenTelemetryEndpoint = OpenTelemetryEndpoint,
OpenTelemetryExportInterval = OpenTelemetryExportInterval,
OpenTelemetryExportProtocol = OpenTelemetryExportProtocol,
OpenTelemetryExportTimeout = OpenTelemetryExportTimeout,
LogLevel = LogLevel,
LoggingFrequency = LoggingFrequency,
QuietMode = QuietMode.GetValueOrDefault(),
Expand Down
13 changes: 12 additions & 1 deletion libs/host/defaults.conf
Original file line number Diff line number Diff line change
Expand Up @@ -456,5 +456,16 @@
"ClusterReplicaResumeWithData": false,

/* Enable Vector Sets (preview) - this feature (and associated commands) are incomplete, unstable, and subject to change while still in preview */
"EnableVectorSetPreview": false
"EnableVectorSetPreview": false,

/* Disable OpenTelemetry metrics reporting by default */
"OpenTelemetryEndpoint": null,

/* Use default export interval from OpenTelemetry SDK */
"OpenTelemetryExportInterval": 0,

/* Use default export protocol from OpenTelemetry SDK */
"OpenTelemetryExportProtocol": null,
/* Use default export timeout from OpenTelemetry SDK */
"OpenTelemetryExportTimeout": 0
}
1 change: 1 addition & 0 deletions libs/server/Garnet.server.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
<PackageReference Include="Microsoft.IdentityModel.Validators" />
<PackageReference Include="Microsoft.Extensions.Logging" />
<PackageReference Include="Microsoft.IdentityModel.Protocols.OpenIdConnect" />
<PackageReference Include="OpenTelemetry.Exporter.OpenTelemetryProtocol" />
<PackageReference Include="System.IdentityModel.Tokens.Jwt" />
<PackageReference Include="System.Numerics.Tensors" />
<PackageReference Include="KeraLua" />
Expand Down
60 changes: 60 additions & 0 deletions libs/server/Metrics/GarnetOpenTelemetryServerMetrics.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.

using System;
using System.Diagnostics.Metrics;

namespace Garnet.server.Metrics
{
/// <summary>
/// Provides OpenTelemetry-compatible metrics for Garnet server using <see cref="Meter"/>.
/// Consumers can subscribe to these metrics using the OpenTelemetry SDK or any other <see cref="MeterListener"/>.
/// The command-rate and network rates are not exposed as metrics as they can be calculated based on the other exposed metrics.
/// </summary>
internal sealed class GarnetOpenTelemetryServerMetrics : IDisposable
{
/// <summary>
/// The meter name used by Garnet server metrics.
/// </summary>
public const string MeterName = "Microsoft.Garnet.Server";

private readonly Meter meter;

/// <summary>
/// Initializes a new instance of the <see cref="GarnetOpenTelemetryServerMetrics"/> class,
/// creating observable instruments that expose server connection metrics via a <see cref="Meter"/>.
/// </summary>
/// <param name="serverMetrics">
/// The <see cref="GarnetServerMetrics"/> instance whose connection counters
/// (active, received, and disposed) are observed by the created instruments.
/// </param>
internal GarnetOpenTelemetryServerMetrics(GarnetServerMetrics serverMetrics)
{
meter = new Meter(MeterName);

meter.CreateObservableGauge(
"garnet.server.connections.active",
() => serverMetrics.total_connections_active,
unit: "{connection}",
description: "Number of currently active client connections.");

meter.CreateObservableCounter(
"garnet.server.connections.received",
() => serverMetrics.total_connections_received,
unit: "{connection}",
description: "Total number of client connections received.");

meter.CreateObservableCounter(
"garnet.server.connections.disposed",
() => serverMetrics.total_connections_disposed,
unit: "{connection}",
description: "Total number of client connections disposed.");
}

/// <inheritdoc />
public void Dispose()
{
meter.Dispose();
}
}
}
117 changes: 117 additions & 0 deletions libs/server/Metrics/GarnetOpenTelemetryServerMonitor.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.

using System;
using System.Reflection;
using Garnet.server.Metrics.Latency;
using OpenTelemetry;
using OpenTelemetry.Metrics;
using OpenTelemetry.Resources;

namespace Garnet.server.Metrics
{
/// <summary>
/// Registers OpenTelemetry metrics for Garnet server and manages their lifecycle. This includes server-level metrics, session-level metrics, and latency metrics.
/// </summary>
/// <remarks>
/// <para>
/// This class acts as the central coordinator for all OpenTelemetry metrics in the Garnet server.
/// It wraps the raw metrics sources (<see cref="GarnetServerMetrics"/> and <see cref="GarnetSessionMetrics"/>)
/// into OpenTelemetry-compatible instruments exposed via <see cref="System.Diagnostics.Metrics.Meter"/> instances:
/// </para>
/// <list type="bullet">
/// <item><description><see cref="GarnetOpenTelemetryServerMetrics"/> — connection-level metrics (active, received, disposed).</description></item>
/// <item><description><see cref="GarnetOpenTelemetrySessionMetrics"/> — session-level metrics (commands processed, network I/O, cache lookups).</description></item>
/// <item><description><see cref="GarnetOpenTelemetryLatencyMetrics"/> — latency histograms and counters (command latency, bytes/ops per receive call).</description></item>
/// </list>
/// <para>
/// Call <see cref="Start"/> after construction to configure the OTLP exporter and begin metric collection.
/// The exporter endpoint, protocol, timeout, and interval are controlled by the corresponding
/// properties on <see cref="GarnetServerOptions"/>.
/// </para>
/// <para>
/// This class implements <see cref="IDisposable"/>; disposing it tears down all underlying meters
/// and the latency metrics singleton.
/// </para>
/// </remarks>
internal sealed class GarnetOpenTelemetryServerMonitor : IDisposable
{
private readonly GarnetServerOptions options;
private readonly GarnetOpenTelemetryServerMetrics serverMetrics;
private readonly GarnetOpenTelemetrySessionMetrics sessionMetrics;
private MeterProvider meterProvider;

/// <summary>
/// Initializes a new instance of the <see cref="GarnetOpenTelemetryServerMonitor"/> class,
/// creating the OpenTelemetry metric wrappers for server and session metrics and initializing
/// the latency metrics singleton.
/// </summary>
/// <param name="options">
/// The <see cref="GarnetServerOptions"/> that control OpenTelemetry export behavior, including
/// <see cref="GarnetServerOptions.OpenTelemetryEndpoint"/>,
/// <see cref="GarnetServerOptions.OpenTelemetryExportProtocol"/>,
/// <see cref="GarnetServerOptions.OpenTelemetryExportTimeout"/> and
/// <see cref="GarnetServerOptions.OpenTelemetryExportInterval"/>
/// </param>
/// <param name="serverMetrics">
/// The <see cref="GarnetServerMetrics"/> instance that provides raw server and session counters.
/// If <see cref="GarnetServerMetrics.globalSessionMetrics"/> is <c>null</c>, session-level
/// metrics will not be registered.
/// </param>
public GarnetOpenTelemetryServerMonitor(GarnetServerOptions options, GarnetServerMetrics serverMetrics)
{
this.options = options;
this.serverMetrics = new GarnetOpenTelemetryServerMetrics(serverMetrics);
this.sessionMetrics = serverMetrics.globalSessionMetrics != null
? new GarnetOpenTelemetrySessionMetrics(serverMetrics.globalSessionMetrics)
: null;

GarnetOpenTelemetryLatencyMetrics.Initialize(options.LatencyMonitor);
}

/// <summary>
/// Initializes and configures OpenTelemetry metrics exporting if an endpoint is specified in the options.
/// </summary>
/// <remarks>Call this method to enable OpenTelemetry metrics collection and exporting for the
/// service. Metrics will be exported using the configured endpoint and protocol. If no endpoint is specified,
/// metrics exporting will not be enabled.</remarks>
public void Start()
{
if (this.options.OpenTelemetryEndpoint != null)
{
this.meterProvider = Sdk.CreateMeterProviderBuilder()
.ConfigureResource(rb => rb.AddService("Microsoft.Garnet", serviceVersion: Assembly.GetEntryAssembly()?.GetName()?.Version?.ToString() ?? "unknown"))
.AddMeter(GarnetOpenTelemetryServerMetrics.MeterName, GarnetOpenTelemetrySessionMetrics.MeterName, GarnetOpenTelemetryLatencyMetrics.MeterName)
.AddOtlpExporter(opts =>
{
opts.Endpoint = this.options.OpenTelemetryEndpoint;

if (this.options.OpenTelemetryExportProtocol.HasValue)
{
opts.Protocol = this.options.OpenTelemetryExportProtocol.Value;
}

if (this.options.OpenTelemetryExportTimeout != 0)
{
opts.TimeoutMilliseconds = this.options.OpenTelemetryExportTimeout;
}

if (this.options.OpenTelemetryExportInterval != 0)
{
opts.BatchExportProcessorOptions.ScheduledDelayMilliseconds = this.options.OpenTelemetryExportInterval;
}
})
.Build();
}
}

/// <inheritdoc />
public void Dispose()
{
this.serverMetrics.Dispose();
this.sessionMetrics?.Dispose();
GarnetOpenTelemetryLatencyMetrics.DisposeInstance();
this.meterProvider?.Dispose();
}
}
}
127 changes: 127 additions & 0 deletions libs/server/Metrics/GarnetOpenTelemetrySessionMetrics.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.

using System;
using System.Diagnostics.Metrics;

namespace Garnet.server.Metrics
{
/// <summary>
/// Exposes Garnet server session metrics as OpenTelemetry instruments using <see cref="Meter"/>.
/// Registers observable counters and gauges that report command processing, network I/O,
/// cache lookup, and session exception statistics from a <see cref="GarnetSessionMetrics"/> instance.
/// </summary>
internal sealed class GarnetOpenTelemetrySessionMetrics : IDisposable
{
/// <summary>
/// The meter name used by Garnet session metrics.
/// </summary>
public const string MeterName = "Microsoft.Garnet.Server.Session";

/// <summary>
/// The <see cref="Meter"/> instance used to create and manage OpenTelemetry instruments
/// for session-level metrics.
/// </summary>
private readonly Meter meter;

/// <summary>
/// Initializes a new instance of the <see cref="GarnetOpenTelemetrySessionMetrics"/> class,
/// creating observable counters and gauges that report session-level statistics from the
/// specified <paramref name="globalSessionMetrics"/> instance.
/// </summary>
/// <param name="globalSessionMetrics">
/// The <see cref="GarnetSessionMetrics"/> instance that supplies the aggregated session statistics.
/// Must not be <see langword="null"/>.
/// </param>
/// <exception cref="ArgumentNullException">
/// Thrown when <paramref name="globalSessionMetrics"/> is <see langword="null"/>.
/// </exception>
internal GarnetOpenTelemetrySessionMetrics(GarnetSessionMetrics globalSessionMetrics)
{
if (globalSessionMetrics == null)
{
throw new ArgumentNullException(nameof(globalSessionMetrics));
}

meter = new Meter(MeterName);

meter.CreateObservableCounter(
"garnet.server.commands.processed",
() => Convert.ToInt64(globalSessionMetrics.get_total_commands_processed()),
unit: "{command}",
description: "Total number of commands processed.");

meter.CreateObservableCounter(
"garnet.server.transaction.commands.received",
() => Convert.ToInt64(globalSessionMetrics.get_total_transaction_commands_received()),
unit: "{command}",
description: "Total number of transaction commands received.");

meter.CreateObservableCounter(
"garnet.server.transaction.commands.failed",
() => Convert.ToInt64(globalSessionMetrics.get_total_transaction_commands_execution_failed()),
unit: "{command}",
description: "Total number of transaction command executions that failed.");

meter.CreateObservableCounter(
"garnet.server.write.commands.processed",
() => Convert.ToInt64(globalSessionMetrics.get_total_write_commands_processed()),
unit: "{command}",
description: "Total number of write commands processed.");

meter.CreateObservableCounter(
"garnet.server.read.commands.processed",
() => Convert.ToInt64(globalSessionMetrics.get_total_read_commands_processed()),
unit: "{command}",
description: "Total number of read commands processed.");

meter.CreateObservableCounter(
"garnet.server.cluster.commands.processed",
() => Convert.ToInt64(globalSessionMetrics.get_total_cluster_commands_processed()),
unit: "{command}",
description: "Total number of cluster commands processed.");

meter.CreateObservableCounter(
"garnet.server.network.bytes.received",
() => Convert.ToInt64(globalSessionMetrics.get_total_net_input_bytes()),
unit: "By",
description: "Total number of bytes received from the network.");

meter.CreateObservableCounter(
"garnet.server.network.bytes.sent",
() => Convert.ToInt64(globalSessionMetrics.get_total_net_output_bytes()),
unit: "By",
description: "Total number of bytes sent to the network.");

meter.CreateObservableCounter(
"garnet.server.cache.lookups",
() => Convert.ToInt64(globalSessionMetrics.get_total_found()) + Convert.ToInt64(globalSessionMetrics.get_total_notfound()),
unit: "{lookup}",
description: "Total number of cache lookups.");

meter.CreateObservableCounter(
"garnet.server.cache.lookups.missed",
() => Convert.ToInt64(globalSessionMetrics.get_total_notfound()),
unit: "{miss}",
description: "Total number of cache misses (unsuccessful key lookups).");

meter.CreateObservableGauge(
"garnet.server.operations.pending",
() => Convert.ToInt64(globalSessionMetrics.get_total_pending()),
unit: "{operation}",
description: "Current number of pending operations.");

meter.CreateObservableCounter(
"garnet.server.resp.session.exceptions",
() => Convert.ToInt64(globalSessionMetrics.get_total_number_resp_server_session_exceptions()),
unit: "{exception}",
description: "Total number of RESP server session exceptions.");
}

/// <inheritdoc />
public void Dispose()
{
meter.Dispose();
}
}
}
2 changes: 2 additions & 0 deletions libs/server/Metrics/Info/GarnetInfoMetrics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ private void PopulateServerInfo(StoreWrapper storeWrapper)
new("uptime_in_days", ((int)uptime.TotalDays).ToString()),
new("monitor_task", storeWrapper.serverOptions.MetricsSamplingFrequency > 0 ? "enabled" : "disabled"),
new("monitor_freq", storeWrapper.serverOptions.MetricsSamplingFrequency.ToString()),
new("otel_export", storeWrapper.serverOptions.OpenTelemetryEndpoint != null ? "enabled" : "disabled"),
new("otel_endpoint", storeWrapper.serverOptions.OpenTelemetryEndpoint?.ToString() ?? "-"),
new("latency_monitor", storeWrapper.serverOptions.LatencyMonitor ? "enabled" : "disabled"),
new("run_id", storeWrapper.RunId),
new("redis_version", storeWrapper.redisProtocolVersion),
Expand Down
Loading