Skip to content

add event loop and gc configuration for runtime metrics #5913

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 22 additions & 4 deletions packages/dd-trace/src/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,12 @@ class Config {
}
}

if (typeof options.runtimeMetrics === 'boolean') {
options.runtimeMetrics = {
enabled: options.runtimeMetrics
}
}

const DD_INSTRUMENTATION_INSTALL_ID = coalesce(
getEnvironmentVariable('DD_INSTRUMENTATION_INSTALL_ID'),
null
Expand Down Expand Up @@ -567,7 +573,10 @@ class Config {
defaults['remoteConfig.enabled'] = true
defaults['remoteConfig.pollInterval'] = 5 // seconds
defaults.reportHostname = false
defaults.runtimeMetrics = false
defaults['runtimeMetrics.enabled'] = false
defaults['runtimeMetrics.eventLoop'] = true
defaults['runtimeMetrics.gc'] = true
defaults['runtimeMetrics.gcCollector'] = 'default'
Comment on lines +578 to +579
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it make sense to have these as two separate config properties? You could have just one (just keep .gc?) and introduce an 'off' value (and maybe use 'on' instead of 'default' for consistency.) You could still have config translate incoming env var values of '0', '1', 'true', 'false' to off and on. Of course, for consistency you should then probably allow use of off and on as keywords for the other two properties a well (enabled and eventLoop.) Maybe we should just add on and off as recognized values in util.js.

We recently added a config for choosing the compression method for uploading profiles and use on/off there in addition to specifically naming compression methods.

Of course, if you envision there could be even more config options for GC metrics in the future, then it may make sense to keep them as separate properties, but we should assess how likely is that.

defaults.runtimeMetricsRuntimeId = false
defaults.sampleRate = undefined
defaults['sampler.rateLimit'] = 100
Expand Down Expand Up @@ -644,7 +653,7 @@ class Config {
this._setBoolean(obj, 'logInjection', DD_LOGS_INJECTION)
const profilingEnabled = normalizeProfilingEnabledValue(DD_PROFILING_ENABLED)
this._setString(obj, 'profiling.enabled', profilingEnabled)
this._setBoolean(obj, 'runtimeMetrics', DD_RUNTIME_METRICS_ENABLED)
this._setBoolean(obj, 'runtimeMetrics.enabled', DD_RUNTIME_METRICS_ENABLED)
this._setString(obj, 'service', DD_SERVICE)
this._setString(obj, 'version', DD_VERSION)
}
Expand Down Expand Up @@ -722,6 +731,9 @@ class Config {
DD_REMOTE_CONFIGURATION_ENABLED,
DD_REMOTE_CONFIG_POLL_INTERVAL_SECONDS,
DD_RUNTIME_METRICS_ENABLED,
DD_RUNTIME_METRICS_EVENT_LOOP,
DD_RUNTIME_METRICS_GC,
DD_RUNTIME_METRICS_GC_COLLECTOR,
DD_SERVICE,
DD_SERVICE_MAPPING,
DD_SITE,
Expand Down Expand Up @@ -943,8 +955,11 @@ class Config {
const otelSetRuntimeMetrics = String(OTEL_METRICS_EXPORTER).toLowerCase() === 'none'
? false
: undefined
this._setBoolean(env, 'runtimeMetrics', DD_RUNTIME_METRICS_ENABLED ||
this._setBoolean(env, 'runtimeMetrics.enabled', DD_RUNTIME_METRICS_ENABLED ||
otelSetRuntimeMetrics)
this._setBoolean(env, 'runtimeMetrics.eventLoop', DD_RUNTIME_METRICS_EVENT_LOOP)
this._setBoolean(env, 'runtimeMetrics.gc', DD_RUNTIME_METRICS_GC)
this._setString(env, 'runtimeMetrics.gcCollector', DD_RUNTIME_METRICS_GC_COLLECTOR)
this._setBoolean(env, 'runtimeMetricsRuntimeId', DD_RUNTIME_METRICS_RUNTIME_ID_ENABLED)
this._setArray(env, 'sampler.spanSamplingRules', reformatSpanSamplingRules(coalesce(
safeJsonParse(maybeFile(DD_SPAN_SAMPLING_RULES_FILE)),
Expand Down Expand Up @@ -1140,7 +1155,10 @@ class Config {
this._optsUnprocessed['remoteConfig.pollInterval'] = options.remoteConfig.pollInterval
}
this._setBoolean(opts, 'reportHostname', options.reportHostname)
this._setBoolean(opts, 'runtimeMetrics', options.runtimeMetrics)
this._setBoolean(opts, 'runtimeMetrics.enabled', options.runtimeMetrics?.enabled)
this._setBoolean(opts, 'runtimeMetrics.eventLoop', options.runtimeMetrics?.eventLoop)
this._setBoolean(opts, 'runtimeMetrics.gc', options.runtimeMetrics?.gc)
this._setString(opts, 'runtimeMetrics.gcCollector', options.runtimeMetrics?.gcCollector)
this._setBoolean(opts, 'runtimeMetricsRuntimeId', options.runtimeMetricsRuntimeId)
this._setArray(opts, 'sampler.spanSamplingRules', reformatSpanSamplingRules(options.spanSamplingRules))
this._setUnit(opts, 'sampleRate', coalesce(options.sampleRate, options.ingestion.sampleRate))
Expand Down
2 changes: 1 addition & 1 deletion packages/dd-trace/src/proxy.js
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ class Tracer extends NoopProxy {
}
}

if (config.runtimeMetrics) {
if (config.runtimeMetrics.enabled) {
runtimeMetrics.start(config)
}

Expand Down
2 changes: 1 addition & 1 deletion packages/dd-trace/src/runtime_metrics/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ const noop = runtimeMetrics = {

module.exports = {
start (config) {
if (!config?.runtimeMetrics) return
if (!config?.runtimeMetrics.enabled) return

runtimeMetrics = require('./runtime_metrics')

Expand Down
29 changes: 19 additions & 10 deletions packages/dd-trace/src/runtime_metrics/runtime_metrics.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,27 @@ reset()
const runtimeMetrics = module.exports = {
start (config) {
const clientConfig = DogStatsDClient.generateClientConfig(config)
const watchers = []

try {
nativeMetrics = require('@datadog/native-metrics')
if (config.runtimeMetrics.gc !== false) {
const gcCollector = config.runtimeMetrics.gcCollector || 'default'

if (hasGCObserver) {
nativeMetrics.start('loop') // Only add event loop watcher and not GC.
if (hasGCProfiler && (gcCollector === 'profiler' || gcCollector === 'default')) {
startGCProfiler()
} else if (hasGCObserver && (gcCollector === 'observer' || gcCollector === 'default')) {
startGCObserver()
} else {
nativeMetrics.start()
watchers.push('gc')
}
}

if (config.runtimeMetrics.eventLoop !== false) {
watchers.push('loop')
}

try {
nativeMetrics = require('@datadog/native-metrics')
nativeMetrics.start(...watchers)
} catch (e) {
log.error('Error starting native metrics', e)
nativeMetrics = null
Expand All @@ -52,9 +64,6 @@ const runtimeMetrics = module.exports = {

time = process.hrtime()

startGCObserver()
startGCProfiler()

if (nativeMetrics) {
interval = setInterval(() => {
captureCommonMetrics()
Expand Down Expand Up @@ -293,7 +302,7 @@ function histogram (name, stats, tag) {
}

function startGCObserver () {
if (gcObserver || hasGCProfiler || !hasGCObserver) return
if (gcObserver) return

gcObserver = new PerformanceObserver(list => {
for (const entry of list.getEntries()) {
Expand All @@ -308,7 +317,7 @@ function startGCObserver () {
}

function startGCProfiler () {
if (gcProfiler || !hasGCProfiler) return
if (gcProfiler) return

gcProfiler = new v8.GCProfiler()
gcProfiler.start()
Expand Down
3 changes: 3 additions & 0 deletions packages/dd-trace/src/supported-configurations.json
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,9 @@
"DD_REMOTE_CONFIG_POLL_INTERVAL_SECONDS": ["A"],
"DD_REMOTE_CONFIGURATION_ENABLED": ["A"],
"DD_RUNTIME_METRICS_ENABLED": ["A"],
"DD_RUNTIME_METRICS_EVENT_LOOP": ["A"],
"DD_RUNTIME_METRICS_GC": ["A"],
"DD_RUNTIME_METRICS_GC_COLLECTOR": ["A"],
"DD_RUNTIME_METRICS_FLUSH_INTERVAL": ["A"],
"DD_RUNTIME_METRICS_RUNTIME_ID_ENABLED": ["A"],
"DD_SERVICE_MAPPING": ["A"],
Expand Down
3 changes: 2 additions & 1 deletion packages/dd-trace/src/telemetry/telemetry.js
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,8 @@ const nameMapping = {
traceId128BitLoggingEnabled: 'DD_TRACE_128_BIT_TRACEID_LOGGING_ENABLED',
instrumentationSource: 'instrumentation_source',
injectionEnabled: 'ssi_injection_enabled',
injectForce: 'ssi_forced_injection_enabled'
injectForce: 'ssi_forced_injection_enabled',
'runtimeMetrics.enabled': 'runtimeMetrics'
}

const namesNeedFormatting = new Set(['DD_TAGS', 'peerServiceMapping', 'serviceMapping'])
Expand Down
43 changes: 30 additions & 13 deletions packages/dd-trace/test/config.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ describe('Config', () => {
expect(config).to.have.property('service', 'service')
expect(config).to.have.property('logLevel', 'error')
expect(config).to.have.property('sampleRate', 0.5)
expect(config).to.have.property('runtimeMetrics', true)
expect(config).to.have.nested.property('runtimeMetrics.enabled', true)
expect(config.tags).to.include({ foo: 'bar', baz: 'qux' })
expect(config).to.have.nested.deep.property('tracePropagationStyle.inject', ['b3', 'tracecontext'])
expect(config).to.have.nested.deep.property('tracePropagationStyle.extract', ['b3', 'tracecontext'])
Expand Down Expand Up @@ -209,7 +209,7 @@ describe('Config', () => {
expect(config).to.have.property('service', 'otel_service')
expect(config).to.have.property('logLevel', 'debug')
expect(config).to.have.property('sampleRate', 0.1)
expect(config).to.have.property('runtimeMetrics', false)
expect(config).to.have.nested.property('runtimeMetrics.enabled', false)
expect(config.tags).to.include({ foo: 'bar1', baz: 'qux1' })
expect(config).to.have.nested.deep.property('tracePropagationStyle.inject', ['b3', 'datadog'])
expect(config).to.have.nested.deep.property('tracePropagationStyle.extract', ['b3', 'datadog'])
Expand Down Expand Up @@ -327,7 +327,10 @@ describe('Config', () => {
expect(config).to.have.nested.property('remoteConfig.enabled', true)
expect(config).to.have.nested.property('remoteConfig.pollInterval', 5)
expect(config).to.have.property('reportHostname', false)
expect(config).to.have.property('runtimeMetrics', false)
expect(config).to.have.nested.property('runtimeMetrics.enabled', false)
expect(config).to.have.nested.property('runtimeMetrics.eventLoop', true)
expect(config).to.have.nested.property('runtimeMetrics.gc', true)
expect(config).to.have.nested.property('runtimeMetrics.gcCollector', 'default')
expect(config).to.have.property('runtimeMetricsRuntimeId', false)
expect(config).to.have.property('sampleRate', undefined)
expect(config).to.have.property('scope', undefined)
Expand Down Expand Up @@ -449,7 +452,7 @@ describe('Config', () => {
{ name: 'remoteConfig.pollInterval', value: 5, origin: 'default' },
{ name: 'reportHostname', value: false, origin: 'default' },
{ name: 'reportHostname', value: false, origin: 'default' },
{ name: 'runtimeMetrics', value: false, origin: 'default' },
{ name: 'runtimeMetrics.enabled', value: false, origin: 'default' },
{ name: 'runtimeMetricsRuntimeId', value: false, origin: 'default' },
{ name: 'sampleRate', value: undefined, origin: 'default' },
{ name: 'sampler.rateLimit', value: 100, origin: 'default' },
Expand Down Expand Up @@ -578,6 +581,9 @@ describe('Config', () => {
process.env.DD_REMOTE_CONFIG_POLL_INTERVAL_SECONDS = '42'
process.env.DD_REMOTE_CONFIGURATION_ENABLED = 'false'
process.env.DD_RUNTIME_METRICS_ENABLED = 'true'
process.env.DD_RUNTIME_METRICS_EVENT_LOOP = 'false'
process.env.DD_RUNTIME_METRICS_GC = 'false'
process.env.DD_RUNTIME_METRICS_GC_COLLECTOR = 'native'
process.env.DD_RUNTIME_METRICS_RUNTIME_ID_ENABLED = 'true'
process.env.DD_SERVICE = 'service'
process.env.DD_SERVICE_MAPPING = 'a:aa, b:bb'
Expand Down Expand Up @@ -693,7 +699,10 @@ describe('Config', () => {
expect(config).to.have.nested.property('remoteConfig.enabled', false)
expect(config).to.have.nested.property('remoteConfig.pollInterval', 42)
expect(config).to.have.property('reportHostname', true)
expect(config).to.have.property('runtimeMetrics', true)
expect(config).to.have.nested.property('runtimeMetrics.enabled', true)
expect(config).to.have.nested.property('runtimeMetrics.eventLoop', false)
expect(config).to.have.nested.property('runtimeMetrics.gc', false)
expect(config).to.have.nested.property('runtimeMetrics.gcCollector', 'native')
expect(config).to.have.property('runtimeMetricsRuntimeId', true)
expect(config).to.have.property('sampleRate', 0.5)
expect(config).to.have.deep.nested.property('sampler', {
Expand Down Expand Up @@ -798,7 +807,7 @@ describe('Config', () => {
{ name: 'remoteConfig.enabled', value: false, origin: 'env_var' },
{ name: 'remoteConfig.pollInterval', value: '42', origin: 'env_var' },
{ name: 'reportHostname', value: true, origin: 'env_var' },
{ name: 'runtimeMetrics', value: true, origin: 'env_var' },
{ name: 'runtimeMetrics.enabled', value: true, origin: 'env_var' },
{ name: 'runtimeMetricsRuntimeId', value: true, origin: 'env_var' },
{ name: 'sampler.rateLimit', value: '-1', origin: 'env_var' },
{ name: 'sampler.rules', value: process.env.DD_TRACE_SAMPLING_RULES, origin: 'env_var' },
Expand Down Expand Up @@ -880,7 +889,7 @@ describe('Config', () => {

expect(config).to.have.property('tracing', false)
expect(config).to.have.property('tracePropagationExtractFirst', true)
expect(config).to.have.property('runtimeMetrics', false)
expect(config).to.have.nested.property('runtimeMetrics.enabled', false)
})

it('should initialize from environment variables with url taking precedence', () => {
Expand Down Expand Up @@ -1022,7 +1031,12 @@ describe('Config', () => {
pollInterval: 42
},
reportHostname: true,
runtimeMetrics: true,
runtimeMetrics: {
enabled: true,
eventLoop: false,
gc: false,
gcCollector: 'native'
},
runtimeMetricsRuntimeId: true,
sampleRate: 0.5,
samplingRules,
Expand Down Expand Up @@ -1092,7 +1106,10 @@ describe('Config', () => {
expect(config).to.have.property('protocolVersion', '0.5')
expect(config).to.have.nested.property('remoteConfig.pollInterval', 42)
expect(config).to.have.property('reportHostname', true)
expect(config).to.have.property('runtimeMetrics', true)
expect(config).to.have.nested.property('runtimeMetrics.enabled', true)
expect(config).to.have.nested.property('runtimeMetrics.eventLoop', false)
expect(config).to.have.nested.property('runtimeMetrics.gc', false)
expect(config).to.have.nested.property('runtimeMetrics.gcCollector', 'native')
expect(config).to.have.property('runtimeMetricsRuntimeId', true)
expect(config).to.have.property('sampleRate', 0.5)
expect(config).to.have.deep.nested.property('sampler', {
Expand Down Expand Up @@ -1174,7 +1191,7 @@ describe('Config', () => {
{ name: 'protocolVersion', value: '0.5', origin: 'code' },
{ name: 'remoteConfig.pollInterval', value: 42, origin: 'code' },
{ name: 'reportHostname', value: true, origin: 'code' },
{ name: 'runtimeMetrics', value: true, origin: 'code' },
{ name: 'runtimeMetrics.enabled', value: true, origin: 'code' },
{ name: 'runtimeMetricsRuntimeId', value: true, origin: 'code' },
{ name: 'sampler.rateLimit', value: 1000, origin: 'code' },
{ name: 'sampler.rules', value: samplingRules, origin: 'code' },
Expand Down Expand Up @@ -1564,7 +1581,7 @@ describe('Config', () => {
expect(config).to.have.property('protocolVersion', '0.5')
expect(config).to.have.nested.property('remoteConfig.pollInterval', 42)
expect(config).to.have.property('reportHostname', false)
expect(config).to.have.property('runtimeMetrics', false)
expect(config).to.have.nested.property('runtimeMetrics.enabled', false)
expect(config).to.have.property('runtimeMetricsRuntimeId', false)
expect(config).to.have.property('service', 'test')
expect(config).to.have.deep.property('serviceMapping', { b: 'bb' })
Expand Down Expand Up @@ -2633,7 +2650,7 @@ apm_configuration_default:
DD_RUNTIME_METRICS_ENABLED: true
`)
const config = new Config()
expect(config).to.have.property('runtimeMetrics', true)
expect(config).to.have.nested.property('runtimeMetrics.enabled', true)
})

it('should apply service specific config', () => {
Expand Down Expand Up @@ -2728,7 +2745,7 @@ apm_configuration_default:
expect(stableConfig.warnings).to.have.lengthOf(0)

const config = new Config()
expect(config).to.have.property('runtimeMetrics', true)
expect(config).to.have.nested.property('runtimeMetrics.enabled', true)
})

it('should log a warning if the YAML files are malformed', () => {
Expand Down
5 changes: 4 additions & 1 deletion packages/dd-trace/test/proxy.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,9 @@ describe('TracerProxy', () => {
remoteConfig: {
enabled: true
},
runtimeMetrics: {
enabled: false
},
configure: sinon.spy(),
llmobs: {}
}
Expand Down Expand Up @@ -386,7 +389,7 @@ describe('TracerProxy', () => {
})

it('should start capturing runtimeMetrics when configured', () => {
config.runtimeMetrics = true
config.runtimeMetrics.enabled = true

proxy.init()

Expand Down
21 changes: 17 additions & 4 deletions packages/dd-trace/test/runtime_metrics.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,15 @@ const suiteDescribe = isWindows ? describe.skip : describe
suiteDescribe('runtimeMetrics (proxy)', () => {
let runtimeMetrics
let proxy
let config

beforeEach(() => {
config = {
runtimeMetrics: {
enabled: false
}
}

runtimeMetrics = sinon.spy({
start () {},
stop () {},
Expand Down Expand Up @@ -54,7 +61,7 @@ suiteDescribe('runtimeMetrics (proxy)', () => {
})

it('should proxy when enabled', () => {
const config = { runtimeMetrics: true }
config.runtimeMetrics.enabled = true

proxy.start(config)
proxy.track()
Expand All @@ -78,11 +85,11 @@ suiteDescribe('runtimeMetrics (proxy)', () => {
})

it('should be noop when disabled after being enabled', () => {
const config = { runtimeMetrics: true }

config.runtimeMetrics.enabled = true
proxy.start(config)
proxy.stop()
proxy.start()
config.runtimeMetrics.enabled = false
proxy.start(config)
proxy.track()
proxy.boolean()
proxy.histogram()
Expand Down Expand Up @@ -153,6 +160,12 @@ suiteDescribe('runtimeMetrics', () => {
hostname: 'localhost',
port: 8125
},
runtimeMetrics: {
enabled: true,
eventLoop: true,
gc: true,
gcCollector: 'default'
},
tags: {
str: 'bar',
obj: {},
Expand Down