From 404787c51f8df4657687e9e54abf6c552add216e Mon Sep 17 00:00:00 2001 From: Nicolas Savoire Date: Fri, 14 Jul 2023 15:13:27 +0200 Subject: [PATCH] [profiling] Associate tracing span IDs with samples in the wall profiler (#3371) * Initial code for code hotspots and endpoint aggregation in wall profiler Hide functionality behind DD_PROFILING_EXPERIMENTAL_CODEHOTSPOTS_ENABLED and DD_PROFILING_EXPERIMENTAL_ENDPOINT_COLLECTION_ENABLED flags. Profiler tracks changes of active span by subscribing to async hooks before channel and to AsyncResourceStorage enter channel that notifies when current store is changed with enterWith/run. Profiler remembers previous span and previous started spans, upon change of active span, it checks if a sample has been taken by native profiler addon, then if that's the case, it updates the context of the sample with span id and root span id computed from previous span / started spans. It also add the tags from the last web span to the context to allow computation of endpoint later on. Minor changes to other parts of the code to make it all work: === * Report error if start fails * Avoid clearing span context tags upon export Profiler may need the tags to determine span resource name for endpoint aggregation: profiler keeps a reference on web span that was active when each profiling sample is taken. Then during profile export, span tags are used to determine endpoint. Endpoint cannot be determined right away because sometimes tags necessary to determine endpoint are only set just before span is finished. * Add request tags just after span creation For web spans, add request tags just after span creation instead of when span finishes. This is needed for profiler code hotspots implementation to determine which span are web spans and should be used to find current endpoint. --- package.json | 2 +- .../src/storage/async_resource.js | 4 + packages/dd-trace/src/plugins/util/web.js | 1 + packages/dd-trace/src/profiling/config.js | 13 +- packages/dd-trace/src/profiling/profiler.js | 2 +- .../dd-trace/src/profiling/profilers/wall.js | 149 +++++++++++++++++- packages/dd-trace/src/span_processor.js | 4 - .../dd-trace/test/profiling/config.spec.js | 25 ++- .../test/profiling/profilers/wall.spec.js | 11 +- 9 files changed, 184 insertions(+), 27 deletions(-) diff --git a/package.json b/package.json index 83e8ceba087..25570e45793 100644 --- a/package.json +++ b/package.json @@ -70,7 +70,7 @@ "@datadog/native-iast-rewriter": "2.0.1", "@datadog/native-iast-taint-tracking": "^1.5.0", "@datadog/native-metrics": "^1.6.0", - "@datadog/pprof": "3.0.0", + "@datadog/pprof": "3.1.0", "@datadog/sketches-js": "^2.1.0", "@types/node": "<18.13", "@opentelemetry/api": "^1.0.0", diff --git a/packages/datadog-core/src/storage/async_resource.js b/packages/datadog-core/src/storage/async_resource.js index bf64f31126a..6dea8cf2fec 100644 --- a/packages/datadog-core/src/storage/async_resource.js +++ b/packages/datadog-core/src/storage/async_resource.js @@ -5,6 +5,7 @@ const { channel } = require('../../../diagnostics_channel') const beforeCh = channel('dd-trace:storage:before') const afterCh = channel('dd-trace:storage:after') +const enterCh = channel('dd-trace:storage:enter') let PrivateSymbol = Symbol function makePrivateSymbol () { @@ -52,6 +53,7 @@ class AsyncResourceStorage { const resource = this._executionAsyncResource() resource[this._ddResourceStore] = store + enterCh.publish() } run (store, callback, ...args) { @@ -61,11 +63,13 @@ class AsyncResourceStorage { const oldStore = resource[this._ddResourceStore] resource[this._ddResourceStore] = store + enterCh.publish() try { return callback(...args) } finally { resource[this._ddResourceStore] = oldStore + enterCh.publish() } } diff --git a/packages/dd-trace/src/plugins/util/web.js b/packages/dd-trace/src/plugins/util/web.js index 50824bcfda4..acb8a8cc92c 100644 --- a/packages/dd-trace/src/plugins/util/web.js +++ b/packages/dd-trace/src/plugins/util/web.js @@ -103,6 +103,7 @@ const web = { context.res = res this.setConfig(req, config) + addRequestTags(context) return span }, diff --git a/packages/dd-trace/src/profiling/config.js b/packages/dd-trace/src/profiling/config.js index 0f94dcbcb99..3e53c04fc6a 100644 --- a/packages/dd-trace/src/profiling/config.js +++ b/packages/dd-trace/src/profiling/config.js @@ -18,7 +18,6 @@ class Config { const { DD_PROFILING_ENABLED, DD_PROFILING_PROFILERS, - DD_PROFILING_ENDPOINT_COLLECTION_ENABLED, DD_ENV, DD_TAGS, DD_SERVICE, @@ -36,7 +35,9 @@ class Config { DD_PROFILING_EXPERIMENTAL_OOM_MONITORING_ENABLED, DD_PROFILING_EXPERIMENTAL_OOM_HEAP_LIMIT_EXTENSION_SIZE, DD_PROFILING_EXPERIMENTAL_OOM_MAX_HEAP_EXTENSION_COUNT, - DD_PROFILING_EXPERIMENTAL_OOM_EXPORT_STRATEGIES + DD_PROFILING_EXPERIMENTAL_OOM_EXPORT_STRATEGIES, + DD_PROFILING_EXPERIMENTAL_CODEHOTSPOTS_ENABLED, + DD_PROFILING_EXPERIMENTAL_ENDPOINT_COLLECTION_ENABLED } = process.env const enabled = isTrue(coalesce(options.enabled, DD_PROFILING_ENABLED, true)) @@ -51,8 +52,8 @@ class Config { Number(DD_PROFILING_UPLOAD_TIMEOUT), 60 * 1000) const sourceMap = coalesce(options.sourceMap, DD_PROFILING_SOURCE_MAP, true) - const endpointCollection = coalesce(options.endpointCollection, - DD_PROFILING_ENDPOINT_COLLECTION_ENABLED, false) + const endpointCollectionEnabled = coalesce(options.endpointCollection, + DD_PROFILING_EXPERIMENTAL_ENDPOINT_COLLECTION_ENABLED, false) const pprofPrefix = coalesce(options.pprofPrefix, DD_PROFILING_PPROF_PREFIX, '') @@ -73,7 +74,7 @@ class Config { this.uploadTimeout = uploadTimeout this.sourceMap = sourceMap this.debugSourceMaps = isTrue(coalesce(options.debugSourceMaps, DD_PROFILING_DEBUG_SOURCE_MAPS, false)) - this.endpointCollection = endpointCollection + this.endpointCollectionEnabled = endpointCollectionEnabled this.pprofPrefix = pprofPrefix const hostname = coalesce(options.hostname, DD_AGENT_HOST) || 'localhost' @@ -110,6 +111,8 @@ class Config { const profilers = options.profilers ? options.profilers : getProfilers({ DD_PROFILING_HEAP_ENABLED, DD_PROFILING_WALLTIME_ENABLED, DD_PROFILING_PROFILERS }) + this.codeHotspotsEnabled = isTrue(coalesce(options.codeHotspotsEnabled, + DD_PROFILING_EXPERIMENTAL_CODEHOTSPOTS_ENABLED, false)) this.profilers = ensureProfilers(profilers, this) } diff --git a/packages/dd-trace/src/profiling/profiler.js b/packages/dd-trace/src/profiling/profiler.js index 47dde4c7e57..c72fa3b6fba 100644 --- a/packages/dd-trace/src/profiling/profiler.js +++ b/packages/dd-trace/src/profiling/profiler.js @@ -23,7 +23,7 @@ class Profiler extends EventEmitter { } start (options) { - this._start(options).catch(() => {}) + this._start(options).catch((err) => { if (options.logger) options.logger.error(err) }) return this } diff --git a/packages/dd-trace/src/profiling/profilers/wall.js b/packages/dd-trace/src/profiling/profilers/wall.js index 820c035040f..c64ec7712e2 100644 --- a/packages/dd-trace/src/profiling/profilers/wall.js +++ b/packages/dd-trace/src/profiling/profilers/wall.js @@ -1,23 +1,111 @@ 'use strict' +const { storage } = require('../../../../datadog-core') + +const dc = require('../../../../diagnostics_channel') + +const beforeCh = dc.channel('dd-trace:storage:before') +const enterCh = dc.channel('dd-trace:storage:enter') + +let kSampleCount + +function getActiveSpan () { + const store = storage.getStore() + return store && store.span +} + +function getStartedSpans (context) { + return context._trace.started +} + +function generateLabels ({ spanId, rootSpanId, webTags, endpoint }) { + const labels = {} + if (spanId) { + labels['span id'] = spanId + } + if (rootSpanId) { + labels['local root span id'] = rootSpanId + } + if (webTags && Object.keys(webTags).length !== 0) { + labels['trace endpoint'] = endpointNameFromTags(webTags) + } else if (endpoint) { + // fallback to endpoint computed when sample was taken + labels['trace endpoint'] = endpoint + } + + return labels +} + +function getSpanContextTags (span) { + return span.context()._tags +} + +function isWebServerSpan (tags) { + return tags['span.type'] === 'web' +} + +function endpointNameFromTags (tags) { + return tags['resource.name'] || [ + tags['http.method'], + tags['http.route'] + ].filter(v => v).join(' ') +} + +function updateContext (context, span, startedSpans, endpointCollectionEnabled) { + context.spanId = span.context().toSpanId() + const rootSpan = startedSpans[0] + if (rootSpan) { + context.rootSpanId = rootSpan.context().toSpanId() + if (endpointCollectionEnabled) { + // Find the first webspan starting from the end: + // There might be several webspans, for example with next.js, http plugin creates a first span + // and then next.js plugin creates a child span, and this child span haves the correct endpoint information. + for (let i = startedSpans.length - 1; i >= 0; i--) { + const tags = getSpanContextTags(startedSpans[i]) + if (isWebServerSpan(tags)) { + context.webTags = tags + // endpoint may not be determined yet, but keep it as fallback + // if tags are not available anymore during serialization + context.endpoint = endpointNameFromTags(tags) + break + } + } + } + } +} + class NativeWallProfiler { constructor (options = {}) { this.type = 'wall' this._samplingIntervalMicros = options.samplingInterval || 1e6 / 99 // 99hz this._flushIntervalMillis = options.flushInterval || 60 * 1e3 // 60 seconds this._codeHotspotsEnabled = !!options.codeHotspotsEnabled + this._endpointCollectionEnabled = !!options.endpointCollectionEnabled this._mapper = undefined this._pprof = undefined + // Bind to this so the same value can be used to unsubscribe later + this._enter = this._enter.bind(this) this._logger = options.logger this._started = false } + codeHotspotsEnabled () { + return this._codeHotspotsEnabled + } + start ({ mapper } = {}) { if (this._started) return + if (this._codeHotspotsEnabled && !this._emittedFFMessage && this._logger) { + this._logger.debug( + `Wall profiler: Enable config_trace_show_breakdown_profiling_for_node feature flag to see code hotspots.`) + this._emittedFFMessage = true + } + this._mapper = mapper this._pprof = require('@datadog/pprof') + kSampleCount = this._pprof.time.constants.kSampleCount // pprof otherwise crashes in worker threads if (!process._startProfilerIdleNotifier) { @@ -31,16 +119,63 @@ class NativeWallProfiler { intervalMicros: this._samplingIntervalMicros, durationMillis: this._flushIntervalMillis, sourceMapper: this._mapper, - customLabels: this._codeHotspotsEnabled, + withContexts: this._codeHotspotsEnabled, lineNumbers: false }) + if (this._codeHotspotsEnabled) { + this._profilerState = this._pprof.time.getState() + this._currentContext = {} + this._pprof.time.setContext(this._currentContext) + this._profilerState = undefined + this._lastSpan = undefined + this._lastStartedSpans = undefined + this._lastSampleCount = 0 + + beforeCh.subscribe(this._enter) + enterCh.subscribe(this._enter) + } + this._started = true } - profile () { + _enter () { if (!this._started) return - return this._pprof.time.stop(true) + + const sampleCount = this._profilerState[kSampleCount] + if (sampleCount !== this._lastSampleCount) { + this._lastSampleCount = sampleCount + const context = this._currentContext + this._currentContext = {} + this._pprof.time.setContext(this._currentContext) + + if (this._lastSpan) { + updateContext(context, this._lastSpan, this._lastStartedSpans, this._endpointCollectionEnabled) + } + } + + const span = getActiveSpan() + if (span) { + this._lastSpan = span + this._lastStartedSpans = getStartedSpans(span.context()) + } else { + this._lastStartedSpans = undefined + this._lastSpan = undefined + } + } + + _stop (restart) { + if (!this._started) return + if (this._codeHotspotsEnabled) { + // update last sample context if needed + this._enter() + this._lastSampleCount = 0 + } + return this._pprof.time.stop(restart, this._codeHotspotsEnabled ? generateLabels : undefined) + } + + profile () { + return this._stop(true) } encode (profile) { @@ -50,7 +185,13 @@ class NativeWallProfiler { stop () { if (!this._started) return - const profile = this._pprof.time.stop() + const profile = this._stop(false) + if (this._codeHotspotsEnabled) { + beforeCh.unsubscribe(this._enter) + enterCh.subscribe(this._enter) + this._profilerState = undefined + } + this._started = false return profile } diff --git a/packages/dd-trace/src/span_processor.js b/packages/dd-trace/src/span_processor.js index aea348b11fb..c6e8c300529 100644 --- a/packages/dd-trace/src/span_processor.js +++ b/packages/dd-trace/src/span_processor.js @@ -138,10 +138,6 @@ class SpanProcessor { } } - for (const span of trace.finished) { - span.context()._tags = {} - } - trace.started = active trace.finished = [] } diff --git a/packages/dd-trace/test/profiling/config.spec.js b/packages/dd-trace/test/profiling/config.spec.js index b49a20c6f16..717ccf0aae9 100644 --- a/packages/dd-trace/test/profiling/config.spec.js +++ b/packages/dd-trace/test/profiling/config.spec.js @@ -42,6 +42,7 @@ describe('config', () => { expect(config.logger).to.be.an.instanceof(ConsoleLogger) expect(config.exporters[0]).to.be.an.instanceof(AgentExporter) expect(config.profilers[0]).to.be.an.instanceof(WallProfiler) + expect(config.profilers[0].codeHotspotsEnabled()).false expect(config.profilers[1]).to.be.an.instanceof(SpaceProfiler) }) @@ -57,8 +58,9 @@ describe('config', () => { error () { } }, exporters: 'agent,file', - profilers: 'wall', - url: 'http://localhost:1234/' + profilers: 'space,wall', + url: 'http://localhost:1234/', + codeHotspotsEnabled: true } const config = new Config(options) @@ -78,8 +80,10 @@ describe('config', () => { expect(config.exporters[0]._url.toString()).to.equal(options.url) expect(config.exporters[1]).to.be.an.instanceof(FileExporter) expect(config.profilers).to.be.an('array') - expect(config.profilers.length).to.equal(1) - expect(config.profilers[0]).to.be.an.instanceOf(WallProfiler) + expect(config.profilers.length).to.equal(2) + expect(config.profilers[0]).to.be.an.instanceOf(SpaceProfiler) + expect(config.profilers[1]).to.be.an.instanceOf(WallProfiler) + expect(config.profilers[1].codeHotspotsEnabled()).true }) it('should filter out invalid profilers', () => { @@ -127,7 +131,8 @@ describe('config', () => { it('should support profiler config with DD_PROFILING_PROFILERS', () => { process.env = { - DD_PROFILING_PROFILERS: 'wall' + DD_PROFILING_PROFILERS: 'wall', + DD_PROFILING_EXPERIMENTAL_CODEHOTSPOTS_ENABLED: '1' } const options = { logger: { @@ -143,6 +148,7 @@ describe('config', () => { expect(config.profilers).to.be.an('array') expect(config.profilers.length).to.equal(1) expect(config.profilers[0]).to.be.an.instanceOf(WallProfiler) + expect(config.profilers[0].codeHotspotsEnabled()).true }) it('should support profiler config with DD_PROFILING_XXX_ENABLED', () => { @@ -190,7 +196,8 @@ describe('config', () => { it('should prioritize options over env variables', () => { process.env = { - DD_PROFILING_PROFILERS: 'wall' + DD_PROFILING_PROFILERS: 'space', + DD_PROFILING_EXPERIMENTAL_CODEHOTSPOTS_ENABLED: '1' } const options = { logger: { @@ -199,14 +206,16 @@ describe('config', () => { warn () {}, error () {} }, - profilers: ['space'] + profilers: ['wall'], + codeHotspotsEnabled: false } const config = new Config(options) expect(config.profilers).to.be.an('array') expect(config.profilers.length).to.equal(1) - expect(config.profilers[0]).to.be.an.instanceOf(SpaceProfiler) + expect(config.profilers[0]).to.be.an.instanceOf(WallProfiler) + expect(config.profilers[0].codeHotspotsEnabled()).false }) it('should support tags', () => { diff --git a/packages/dd-trace/test/profiling/profilers/wall.spec.js b/packages/dd-trace/test/profiling/profilers/wall.spec.js index 4b6d3731cd9..35c04eae21c 100644 --- a/packages/dd-trace/test/profiling/profilers/wall.spec.js +++ b/packages/dd-trace/test/profiling/profilers/wall.spec.js @@ -15,7 +15,10 @@ describe('profilers/native/wall', () => { encode: sinon.stub().returns(Promise.resolve()), time: { start: sinon.stub(), - stop: sinon.stub().returns('profile') + stop: sinon.stub().returns('profile'), + constants: { + kSampleCount: 0 + } } } @@ -48,7 +51,7 @@ describe('profilers/native/wall', () => { { intervalMicros: 1e6 / 99, durationMillis: 60000, sourceMapper: undefined, - customLabels: false, + withContexts: false, lineNumbers: false }) }) @@ -63,7 +66,7 @@ describe('profilers/native/wall', () => { { intervalMicros: 500, durationMillis: 60000, sourceMapper: undefined, - customLabels: false, + withContexts: false, lineNumbers: false }) }) @@ -134,7 +137,7 @@ describe('profilers/native/wall', () => { { intervalMicros: 1e6 / 99, durationMillis: 60000, sourceMapper: mapper, - customLabels: false, + withContexts: false, lineNumbers: false }) }) })