Skip to content

SOLR-17806: Migrate ADMIN node registry metrics to OTEL #3444

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Aug 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 25 additions & 15 deletions solr/core/src/java/org/apache/solr/core/CoreContainer.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import static org.apache.solr.common.params.CommonParams.METRICS_PATH;
import static org.apache.solr.common.params.CommonParams.ZK_PATH;
import static org.apache.solr.common.params.CommonParams.ZK_STATUS_PATH;
import static org.apache.solr.metrics.SolrMetricProducer.HANDLER_ATTR;
import static org.apache.solr.search.SolrIndexSearcher.EXECUTOR_MAX_CPU_THREADS;
import static org.apache.solr.security.AuthenticationPlugin.AUTHENTICATION_PLUGIN_PROP;

Expand Down Expand Up @@ -537,9 +538,10 @@ private void initializeAuditloggerPlugin(Map<String, Object> auditConf) {
newVersion, getResourceLoader().newInstance(klas, AuditLoggerPlugin.class));

newAuditloggerPlugin.plugin.init(auditConf);
// TODO SOLR-17458: Add Otel
newAuditloggerPlugin.plugin.initializeMetrics(
solrMetricsContext, Attributes.empty(), "/auditlogging");
solrMetricsContext,
Attributes.builder().put(HANDLER_ATTR, "/auditlogging").build(),
"/auditlogging");
} else {
log.debug("Security conf doesn't exist. Skipping setup for audit logging module.");
}
Expand Down Expand Up @@ -604,9 +606,10 @@ private synchronized void initializeAuthenticationPlugin(
if (authenticationPlugin != null) {
authenticationPlugin.plugin.init(authenticationConfig);
setupHttpClientForAuthPlugin(authenticationPlugin.plugin);
// TODO SOLR-17458: Add Otel
authenticationPlugin.plugin.initializeMetrics(
solrMetricsContext, Attributes.empty(), "/authentication");
solrMetricsContext,
Attributes.builder().put(HANDLER_ATTR, "/authentication").build(),
"/authentication");
}
this.authenticationPlugin = authenticationPlugin;
try {
Expand Down Expand Up @@ -861,14 +864,14 @@ private void loadInternal() {
shardHandlerFactory =
ShardHandlerFactory.newInstance(cfg.getShardHandlerFactoryPluginInfo(), loader);
if (shardHandlerFactory instanceof SolrMetricProducer metricProducer) {
// TODO SOLR-17458: Add Otel
// NOCOMMIT SOLR-17458: Add Otel
metricProducer.initializeMetrics(solrMetricsContext, Attributes.empty(), "httpShardHandler");
}

updateShardHandler = new UpdateShardHandler(cfg.getUpdateShardHandlerConfig());
solrClientProvider =
new HttpSolrClientProvider(cfg.getUpdateShardHandlerConfig(), solrMetricsContext);
// TODO SOLR-17458: Add Otel
// NOCOMMIT SOLR-17458: Add Otel
updateShardHandler.initializeMetrics(
solrMetricsContext, Attributes.empty(), "updateShardHandler");
solrClientCache = new SolrClientCache(solrClientProvider.getSolrClient());
Expand All @@ -881,7 +884,7 @@ private void loadInternal() {
for (Map.Entry<String, CacheConfig> e : cachesConfig.entrySet()) {
SolrCache<?, ?> c = e.getValue().newInstance();
String cacheName = e.getKey();
// TODO SOLR-17458: Add Otel
// NOCOMMIT SOLR-17458: Add Otel
c.initializeMetrics(solrMetricsContext, Attributes.empty(), "nodeLevelCache/" + cacheName);
m.put(cacheName, c);
}
Expand All @@ -896,7 +899,7 @@ private void loadInternal() {
if (isZooKeeperAware()) {
solrClientCache.setDefaultZKHost(getZkController().getZkServerAddress());
// initialize ZkClient metrics
// TODO SOLR-17458: Add Otel
// NOCOMMIT SOLR-17458: Add Otel
zkSys
.getZkMetricsProducer()
.initializeMetrics(solrMetricsContext, Attributes.empty(), "zkClient");
Expand All @@ -905,9 +908,11 @@ private void loadInternal() {
this,
zkSys.getZkController().getNodeName(),
(PublicKeyHandler) containerHandlers.get(PublicKeyHandler.PATH));
// TODO SOLR-17458: Add Otel
// NOCOMMIT SOLR-17458: AuthenticationPlugin.java
pkiAuthenticationSecurityBuilder.initializeMetrics(
solrMetricsContext, Attributes.empty(), "/authentication/pki");
solrMetricsContext,
Attributes.builder().put(HANDLER_ATTR, "/authentication/pki").build(),
"/authentication/pki");

fileStore = new DistribFileStore(this);
registerV2ApiIfEnabled(ClusterFileStore.class);
Expand Down Expand Up @@ -985,11 +990,15 @@ private void loadInternal() {
metricsHandler = new MetricsHandler(this);
containerHandlers.put(METRICS_PATH, metricsHandler);
// TODO SOLR-17458: Add Otel
metricsHandler.initializeMetrics(solrMetricsContext, Attributes.empty(), METRICS_PATH);
metricsHandler.initializeMetrics(
solrMetricsContext,
Attributes.builder().put(HANDLER_ATTR, METRICS_PATH).build(),
METRICS_PATH);

containerHandlers.put(AUTHZ_PATH, securityConfHandler);
// TODO SOLR-17458: Add Otel
securityConfHandler.initializeMetrics(solrMetricsContext, Attributes.empty(), AUTHZ_PATH);
securityConfHandler.initializeMetrics(
solrMetricsContext, Attributes.builder().put(HANDLER_ATTR, AUTHZ_PATH).build(), AUTHZ_PATH);
containerHandlers.put(AUTHC_PATH, securityConfHandler);

PluginInfo[] metricReporters = cfg.getMetricsConfig().getMetricReporters();
Expand Down Expand Up @@ -1111,7 +1120,7 @@ private void loadInternal() {
"version");

SolrFieldCacheBean fieldCacheBean = new SolrFieldCacheBean();
// TODO SOLR-17458: Otel migration
// NOCOMMIT SOLR-17458: Otel migration
fieldCacheBean.initializeMetrics(solrMetricsContext, Attributes.empty(), "");

if (isZooKeeperAware()) {
Expand Down Expand Up @@ -2449,8 +2458,9 @@ protected <T> T createHandler(String path, String handlerClass, Class<T> clazz)
}
if (handler instanceof SolrMetricProducer) {
((SolrMetricProducer) handler)
// TODO SOLR-17458: Add Otel
.initializeMetrics(solrMetricsContext, Attributes.empty(), path);
// NOCOMMIT SOLR-17458: Add Otel
.initializeMetrics(
solrMetricsContext, Attributes.builder().put(HANDLER_ATTR, path).build(), path);
}
return handler;
}
Expand Down
141 changes: 63 additions & 78 deletions solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,10 @@
import static org.apache.solr.core.RequestParams.USEPARAM;
import static org.apache.solr.response.SolrQueryResponse.haveCompleteResults;

import com.codahale.metrics.Counter;
import com.codahale.metrics.Meter;
import com.codahale.metrics.Timer;
import io.opentelemetry.api.common.AttributeKey;
import io.opentelemetry.api.common.Attributes;
import io.opentelemetry.api.metrics.LongCounter;
import io.opentelemetry.api.metrics.LongHistogram;
import java.lang.invoke.MethodHandles;
import java.util.Collection;
import java.util.Collections;
Expand Down Expand Up @@ -176,7 +175,10 @@ public void initializeMetrics(
this.solrMetricsContext = parentContext.getChildContext(this);
}

metrics = new HandlerMetrics(solrMetricsContext, attributes, getCategory().toString(), scope);
metrics =
new HandlerMetrics(
solrMetricsContext,
attributes.toBuilder().put(CATEGORY_ATTR, getCategory().toString()).build());

// NOCOMMIT: I don't see value in this metric
solrMetricsContext.gauge(
Expand All @@ -194,74 +196,65 @@ public static class HandlerMetrics {
"NO_OP"),
Attributes.empty());

public final Meter numErrors;
public final Meter numServerErrors;
public final Meter numClientErrors;
public final Meter numTimeouts;
public final Counter requests;
public final Timer requestTimes;
public final Counter totalTime;

public AttributedLongCounter otelRequests;
public AttributedLongCounter otelNumServerErrors;
public AttributedLongCounter otelNumClientErrors;
public AttributedLongCounter otelNumTimeouts;
public AttributedLongTimer otelRequestTimes;

public HandlerMetrics(
SolrMetricsContext solrMetricsContext, Attributes attributes, String... metricPath) {

// NOCOMMIT SOLR-17458: To be removed
numErrors = solrMetricsContext.meter("errors", metricPath);
numServerErrors = solrMetricsContext.meter("serverErrors", metricPath);
numClientErrors = solrMetricsContext.meter("clientErrors", metricPath);
numTimeouts = solrMetricsContext.meter("timeouts", metricPath);
requests = solrMetricsContext.counter("requests", metricPath);
requestTimes = solrMetricsContext.timer("requestTimes", metricPath);
totalTime = solrMetricsContext.counter("totalTime", metricPath);

var baseRequestMetric =
solrMetricsContext.longCounter("solr_metrics_core_requests", "HTTP Solr request counts");

var baseErrorRequestMetric =
solrMetricsContext.longCounter(
"solr_metrics_core_requests_errors", "HTTP Solr request error counts");

var baseRequestTimeMetric =
solrMetricsContext.longHistogram(
"solr_metrics_core_requests_times", "HTTP Solr request times", "ms");

otelRequests =
new AttributedLongCounter(
baseRequestMetric, Attributes.builder().putAll(attributes).build());
public AttributedLongCounter requests;
public AttributedLongCounter numServerErrors;
public AttributedLongCounter numClientErrors;
public AttributedLongCounter numTimeouts;
public AttributedLongTimer requestTimes;

public HandlerMetrics(SolrMetricsContext solrMetricsContext, Attributes attributes) {

LongCounter requestMetric;
LongCounter errorRequestMetric;
LongCounter timeoutRequestMetric;
LongHistogram requestTimeMetric;

if (solrMetricsContext.getRegistryName().equals("solr.node")) {
requestMetric =
solrMetricsContext.longCounter("solr_node_requests", "Http Solr node requests");
errorRequestMetric =
solrMetricsContext.longCounter(
"solr_node_requests_errors", "HTTP Solr node request errors");
timeoutRequestMetric =
solrMetricsContext.longCounter(
"solr_node_requests_timeout", "HTTP Solr node request timeouts");
requestTimeMetric =
solrMetricsContext.longHistogram(
"solr_node_requests_times", "HTTP Solr node request times", "ms");
} else {
requestMetric =
solrMetricsContext.longCounter("solr_core_requests", "HTTP Solr core requests");
errorRequestMetric =
solrMetricsContext.longCounter(
"solr_core_requests_errors", "HTTP Solr core request errors");
timeoutRequestMetric =
solrMetricsContext.longCounter(
"solr_core_requests_timeout", "HTTP Solr core request timeouts");
requestTimeMetric =
solrMetricsContext.longHistogram(
"solr_core_requests_times", "HTTP Solr core request times", "ms");
}

otelNumServerErrors =
new AttributedLongCounter(
baseErrorRequestMetric,
Attributes.builder()
.putAll(attributes)
.put(AttributeKey.stringKey("source"), "server")
.build());
requests = new AttributedLongCounter(requestMetric, attributes);

otelNumClientErrors =
numServerErrors =
new AttributedLongCounter(
baseErrorRequestMetric,
Attributes.builder()
.putAll(attributes)
.put(AttributeKey.stringKey("source"), "client")
.build());
errorRequestMetric,
attributes.toBuilder().put(AttributeKey.stringKey("source"), "server").build());

otelNumTimeouts =
numClientErrors =
new AttributedLongCounter(
baseErrorRequestMetric,
Attributes.builder().putAll(attributes).put(TYPE_ATTR, "timeouts").build());
errorRequestMetric,
attributes.toBuilder().put(AttributeKey.stringKey("source"), "client").build());

numTimeouts = new AttributedLongCounter(timeoutRequestMetric, attributes);

otelRequestTimes = new AttributedLongTimer(baseRequestTimeMetric, attributes);
requestTimes = new AttributedLongTimer(requestTimeMetric, attributes);
// NOCOMMIT: Temporary to see metrics
otelRequests.add(0L);
otelNumTimeouts.add(0L);
otelNumClientErrors.add(0L);
otelNumServerErrors.add(0L);
requests.add(0L);
numTimeouts.add(0L);
numClientErrors.add(0L);
numServerErrors.add(0L);
}
}

Expand All @@ -288,10 +281,8 @@ public void handleRequest(SolrQueryRequest req, SolrQueryResponse rsp) {

HandlerMetrics metrics = getMetricsForThisRequest(req);
metrics.requests.inc();
metrics.otelRequests.inc();

Timer.Context timer = metrics.requestTimes.time();
AttributedLongTimer.MetricTimer otelTimer = metrics.otelRequestTimes.start();
AttributedLongTimer.MetricTimer timer = metrics.requestTimes.start();
try {
TestInjection.injectLeaderTragedy(req.getCore());
if (pluginInfo != null && pluginInfo.attributes.containsKey(USEPARAM))
Expand All @@ -303,8 +294,7 @@ public void handleRequest(SolrQueryRequest req, SolrQueryResponse rsp) {
// count timeouts

if (!haveCompleteResults(rsp.getResponseHeader())) {
metrics.numTimeouts.mark();
metrics.otelNumTimeouts.inc();
metrics.numTimeouts.inc();
rsp.setHttpCaching(false);
}
} catch (QueryLimitsExceededException e) {
Expand All @@ -315,9 +305,7 @@ public void handleRequest(SolrQueryRequest req, SolrQueryResponse rsp) {
rsp.setException(normalized);
} finally {
try {
long elapsed = timer.stop();
metrics.totalTime.inc(elapsed);
otelTimer.stop();
timer.stop();

if (publishCpuTime) {
Optional<Long> cpuTime = ThreadCpuTimer.readMSandReset(REQUEST_CPU_TIMER_CONTEXT);
Expand Down Expand Up @@ -359,15 +347,12 @@ public static void processErrorMetricsOnException(Exception e, HandlerMetrics me
}
}

metrics.numErrors.mark();
if (isClientError) {
log.error("Client exception", e);
metrics.numClientErrors.mark();
metrics.otelNumClientErrors.inc();
metrics.numClientErrors.inc();
} else {
log.error("Server exception", e);
metrics.numServerErrors.mark();
metrics.otelNumServerErrors.inc();
metrics.numServerErrors.inc();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@
/** Refer SOLR-281 */
public class SearchHandler extends RequestHandlerBase
implements SolrCoreAware, PluginInfoInitialized, PermissionNameProvider {

public static final AttributeKey<Boolean> INTERNAL_ATTR = AttributeKey.booleanKey("internal");
static final String INIT_COMPONENTS = "components";
static final String INIT_FIRST_COMPONENTS = "first-components";
static final String INIT_LAST_COMPONENTS = "last-components";
Expand Down Expand Up @@ -153,28 +155,21 @@ public void init(PluginInfo info) {
}
}

// TODO SOLR-17458: Fix metric Attributes
@Override
public void initializeMetrics(
SolrMetricsContext parentContext, Attributes attributes, String scope) {
super.initializeMetrics(
parentContext,
Attributes.builder()
.putAll(attributes)
.put(AttributeKey.stringKey("category"), getCategory().toString())
.put(AttributeKey.stringKey("internal"), "false")
.build(),
Attributes.builder().putAll(attributes).put(INTERNAL_ATTR, false).build(),
scope);
metricsShard =
new HandlerMetrics( // will register various metrics in the context
solrMetricsContext,
Attributes.builder()
.putAll(attributes)
.put(AttributeKey.stringKey("category"), getCategory().toString())
.put(AttributeKey.stringKey("internal"), "true")
.build(),
getCategory().toString(),
scope + SHARD_HANDLER_SUFFIX);
.put(CATEGORY_ATTR, getCategory().toString())
.put(INTERNAL_ATTR, true)
.build());
}

@Override
Expand Down
Loading