Skip to content

Commit

Permalink
[profiling] Associate tracing span IDs with samples in the wall profi…
Browse files Browse the repository at this point in the history
…ler (#3371)

* Initial code for code hotspots and endpoint aggregation in wall profiler

Hide functionality behind DD_PROFILING_EXPERIMENTAL_CODEHOTSPOTS_ENABLED
and DD_PROFILING_EXPERIMENTAL_ENDPOINT_COLLECTION_ENABLED flags.

Profiler tracks changes of active span by subscribing to async hooks
before channel and to AsyncResourceStorage enter channel that
notifies when current store is changed with enterWith/run.
Profiler remembers previous span and previous started spans, upon change
of active span, it checks if a sample has been taken by native profiler
addon, then if that's the case, it updates the context of the sample
with span id and root span id computed from previous span / started spans.
It also add the tags from the last web span to the context to allow
computation of endpoint later on.

Minor changes to other parts of the code to make it all work:
===

* Report error if start fails

* Avoid clearing span context tags upon export

Profiler may need the tags to determine span resource name for endpoint
aggregation: profiler keeps a reference on web span that was active when
each profiling sample is taken. Then during profile export, span tags
are used to determine endpoint. Endpoint cannot be determined right away
because sometimes tags necessary to determine endpoint are only set just
before span is finished.

* Add request tags just after span creation

For web spans, add request tags just after span creation instead of when
span finishes. This is needed for profiler code hotspots implementation
to determine which span are web spans and should be used to find current
endpoint.
  • Loading branch information
nsavoire authored and tlhunter committed Jul 21, 2023
1 parent 4b02e9e commit 404787c
Show file tree
Hide file tree
Showing 9 changed files with 184 additions and 27 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
"@datadog/native-iast-rewriter": "2.0.1",
"@datadog/native-iast-taint-tracking": "^1.5.0",
"@datadog/native-metrics": "^1.6.0",
"@datadog/pprof": "3.0.0",
"@datadog/pprof": "3.1.0",
"@datadog/sketches-js": "^2.1.0",
"@types/node": "<18.13",
"@opentelemetry/api": "^1.0.0",
Expand Down
4 changes: 4 additions & 0 deletions packages/datadog-core/src/storage/async_resource.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ const { channel } = require('../../../diagnostics_channel')

const beforeCh = channel('dd-trace:storage:before')
const afterCh = channel('dd-trace:storage:after')
const enterCh = channel('dd-trace:storage:enter')

let PrivateSymbol = Symbol
function makePrivateSymbol () {
Expand Down Expand Up @@ -52,6 +53,7 @@ class AsyncResourceStorage {
const resource = this._executionAsyncResource()

resource[this._ddResourceStore] = store
enterCh.publish()
}

run (store, callback, ...args) {
Expand All @@ -61,11 +63,13 @@ class AsyncResourceStorage {
const oldStore = resource[this._ddResourceStore]

resource[this._ddResourceStore] = store
enterCh.publish()

try {
return callback(...args)
} finally {
resource[this._ddResourceStore] = oldStore
enterCh.publish()
}
}

Expand Down
1 change: 1 addition & 0 deletions packages/dd-trace/src/plugins/util/web.js
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ const web = {
context.res = res

this.setConfig(req, config)
addRequestTags(context)

return span
},
Expand Down
13 changes: 8 additions & 5 deletions packages/dd-trace/src/profiling/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ class Config {
const {
DD_PROFILING_ENABLED,
DD_PROFILING_PROFILERS,
DD_PROFILING_ENDPOINT_COLLECTION_ENABLED,
DD_ENV,
DD_TAGS,
DD_SERVICE,
Expand All @@ -36,7 +35,9 @@ class Config {
DD_PROFILING_EXPERIMENTAL_OOM_MONITORING_ENABLED,
DD_PROFILING_EXPERIMENTAL_OOM_HEAP_LIMIT_EXTENSION_SIZE,
DD_PROFILING_EXPERIMENTAL_OOM_MAX_HEAP_EXTENSION_COUNT,
DD_PROFILING_EXPERIMENTAL_OOM_EXPORT_STRATEGIES
DD_PROFILING_EXPERIMENTAL_OOM_EXPORT_STRATEGIES,
DD_PROFILING_EXPERIMENTAL_CODEHOTSPOTS_ENABLED,
DD_PROFILING_EXPERIMENTAL_ENDPOINT_COLLECTION_ENABLED
} = process.env

const enabled = isTrue(coalesce(options.enabled, DD_PROFILING_ENABLED, true))
Expand All @@ -51,8 +52,8 @@ class Config {
Number(DD_PROFILING_UPLOAD_TIMEOUT), 60 * 1000)
const sourceMap = coalesce(options.sourceMap,
DD_PROFILING_SOURCE_MAP, true)
const endpointCollection = coalesce(options.endpointCollection,
DD_PROFILING_ENDPOINT_COLLECTION_ENABLED, false)
const endpointCollectionEnabled = coalesce(options.endpointCollection,
DD_PROFILING_EXPERIMENTAL_ENDPOINT_COLLECTION_ENABLED, false)
const pprofPrefix = coalesce(options.pprofPrefix,
DD_PROFILING_PPROF_PREFIX, '')

Expand All @@ -73,7 +74,7 @@ class Config {
this.uploadTimeout = uploadTimeout
this.sourceMap = sourceMap
this.debugSourceMaps = isTrue(coalesce(options.debugSourceMaps, DD_PROFILING_DEBUG_SOURCE_MAPS, false))
this.endpointCollection = endpointCollection
this.endpointCollectionEnabled = endpointCollectionEnabled
this.pprofPrefix = pprofPrefix

const hostname = coalesce(options.hostname, DD_AGENT_HOST) || 'localhost'
Expand Down Expand Up @@ -110,6 +111,8 @@ class Config {
const profilers = options.profilers
? options.profilers
: getProfilers({ DD_PROFILING_HEAP_ENABLED, DD_PROFILING_WALLTIME_ENABLED, DD_PROFILING_PROFILERS })
this.codeHotspotsEnabled = isTrue(coalesce(options.codeHotspotsEnabled,
DD_PROFILING_EXPERIMENTAL_CODEHOTSPOTS_ENABLED, false))

this.profilers = ensureProfilers(profilers, this)
}
Expand Down
2 changes: 1 addition & 1 deletion packages/dd-trace/src/profiling/profiler.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class Profiler extends EventEmitter {
}

start (options) {
this._start(options).catch(() => {})
this._start(options).catch((err) => { if (options.logger) options.logger.error(err) })
return this
}

Expand Down
149 changes: 145 additions & 4 deletions packages/dd-trace/src/profiling/profilers/wall.js
Original file line number Diff line number Diff line change
@@ -1,23 +1,111 @@
'use strict'

const { storage } = require('../../../../datadog-core')

const dc = require('../../../../diagnostics_channel')

const beforeCh = dc.channel('dd-trace:storage:before')
const enterCh = dc.channel('dd-trace:storage:enter')

let kSampleCount

function getActiveSpan () {
const store = storage.getStore()
return store && store.span
}

function getStartedSpans (context) {
return context._trace.started
}

function generateLabels ({ spanId, rootSpanId, webTags, endpoint }) {
const labels = {}
if (spanId) {
labels['span id'] = spanId
}
if (rootSpanId) {
labels['local root span id'] = rootSpanId
}
if (webTags && Object.keys(webTags).length !== 0) {
labels['trace endpoint'] = endpointNameFromTags(webTags)
} else if (endpoint) {
// fallback to endpoint computed when sample was taken
labels['trace endpoint'] = endpoint
}

return labels
}

function getSpanContextTags (span) {
return span.context()._tags
}

function isWebServerSpan (tags) {
return tags['span.type'] === 'web'
}

function endpointNameFromTags (tags) {
return tags['resource.name'] || [
tags['http.method'],
tags['http.route']
].filter(v => v).join(' ')
}

function updateContext (context, span, startedSpans, endpointCollectionEnabled) {
context.spanId = span.context().toSpanId()
const rootSpan = startedSpans[0]
if (rootSpan) {
context.rootSpanId = rootSpan.context().toSpanId()
if (endpointCollectionEnabled) {
// Find the first webspan starting from the end:
// There might be several webspans, for example with next.js, http plugin creates a first span
// and then next.js plugin creates a child span, and this child span haves the correct endpoint information.
for (let i = startedSpans.length - 1; i >= 0; i--) {
const tags = getSpanContextTags(startedSpans[i])
if (isWebServerSpan(tags)) {
context.webTags = tags
// endpoint may not be determined yet, but keep it as fallback
// if tags are not available anymore during serialization
context.endpoint = endpointNameFromTags(tags)
break
}
}
}
}
}

class NativeWallProfiler {
constructor (options = {}) {
this.type = 'wall'
this._samplingIntervalMicros = options.samplingInterval || 1e6 / 99 // 99hz
this._flushIntervalMillis = options.flushInterval || 60 * 1e3 // 60 seconds
this._codeHotspotsEnabled = !!options.codeHotspotsEnabled
this._endpointCollectionEnabled = !!options.endpointCollectionEnabled
this._mapper = undefined
this._pprof = undefined

// Bind to this so the same value can be used to unsubscribe later
this._enter = this._enter.bind(this)
this._logger = options.logger
this._started = false
}

codeHotspotsEnabled () {
return this._codeHotspotsEnabled
}

start ({ mapper } = {}) {
if (this._started) return

if (this._codeHotspotsEnabled && !this._emittedFFMessage && this._logger) {
this._logger.debug(
`Wall profiler: Enable config_trace_show_breakdown_profiling_for_node feature flag to see code hotspots.`)
this._emittedFFMessage = true
}

this._mapper = mapper
this._pprof = require('@datadog/pprof')
kSampleCount = this._pprof.time.constants.kSampleCount

// pprof otherwise crashes in worker threads
if (!process._startProfilerIdleNotifier) {
Expand All @@ -31,16 +119,63 @@ class NativeWallProfiler {
intervalMicros: this._samplingIntervalMicros,
durationMillis: this._flushIntervalMillis,
sourceMapper: this._mapper,
customLabels: this._codeHotspotsEnabled,
withContexts: this._codeHotspotsEnabled,
lineNumbers: false
})

if (this._codeHotspotsEnabled) {
this._profilerState = this._pprof.time.getState()
this._currentContext = {}
this._pprof.time.setContext(this._currentContext)
this._profilerState = undefined
this._lastSpan = undefined
this._lastStartedSpans = undefined
this._lastSampleCount = 0

beforeCh.subscribe(this._enter)
enterCh.subscribe(this._enter)
}

this._started = true
}

profile () {
_enter () {
if (!this._started) return
return this._pprof.time.stop(true)

const sampleCount = this._profilerState[kSampleCount]
if (sampleCount !== this._lastSampleCount) {
this._lastSampleCount = sampleCount
const context = this._currentContext
this._currentContext = {}
this._pprof.time.setContext(this._currentContext)

if (this._lastSpan) {
updateContext(context, this._lastSpan, this._lastStartedSpans, this._endpointCollectionEnabled)
}
}

const span = getActiveSpan()
if (span) {
this._lastSpan = span
this._lastStartedSpans = getStartedSpans(span.context())
} else {
this._lastStartedSpans = undefined
this._lastSpan = undefined
}
}

_stop (restart) {
if (!this._started) return
if (this._codeHotspotsEnabled) {
// update last sample context if needed
this._enter()
this._lastSampleCount = 0
}
return this._pprof.time.stop(restart, this._codeHotspotsEnabled ? generateLabels : undefined)
}

profile () {
return this._stop(true)
}

encode (profile) {
Expand All @@ -50,7 +185,13 @@ class NativeWallProfiler {
stop () {
if (!this._started) return

const profile = this._pprof.time.stop()
const profile = this._stop(false)
if (this._codeHotspotsEnabled) {
beforeCh.unsubscribe(this._enter)
enterCh.subscribe(this._enter)
this._profilerState = undefined
}

this._started = false
return profile
}
Expand Down
4 changes: 0 additions & 4 deletions packages/dd-trace/src/span_processor.js
Original file line number Diff line number Diff line change
Expand Up @@ -138,10 +138,6 @@ class SpanProcessor {
}
}

for (const span of trace.finished) {
span.context()._tags = {}
}

trace.started = active
trace.finished = []
}
Expand Down
25 changes: 17 additions & 8 deletions packages/dd-trace/test/profiling/config.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ describe('config', () => {
expect(config.logger).to.be.an.instanceof(ConsoleLogger)
expect(config.exporters[0]).to.be.an.instanceof(AgentExporter)
expect(config.profilers[0]).to.be.an.instanceof(WallProfiler)
expect(config.profilers[0].codeHotspotsEnabled()).false
expect(config.profilers[1]).to.be.an.instanceof(SpaceProfiler)
})

Expand All @@ -57,8 +58,9 @@ describe('config', () => {
error () { }
},
exporters: 'agent,file',
profilers: 'wall',
url: 'http://localhost:1234/'
profilers: 'space,wall',
url: 'http://localhost:1234/',
codeHotspotsEnabled: true
}

const config = new Config(options)
Expand All @@ -78,8 +80,10 @@ describe('config', () => {
expect(config.exporters[0]._url.toString()).to.equal(options.url)
expect(config.exporters[1]).to.be.an.instanceof(FileExporter)
expect(config.profilers).to.be.an('array')
expect(config.profilers.length).to.equal(1)
expect(config.profilers[0]).to.be.an.instanceOf(WallProfiler)
expect(config.profilers.length).to.equal(2)
expect(config.profilers[0]).to.be.an.instanceOf(SpaceProfiler)
expect(config.profilers[1]).to.be.an.instanceOf(WallProfiler)
expect(config.profilers[1].codeHotspotsEnabled()).true
})

it('should filter out invalid profilers', () => {
Expand Down Expand Up @@ -127,7 +131,8 @@ describe('config', () => {

it('should support profiler config with DD_PROFILING_PROFILERS', () => {
process.env = {
DD_PROFILING_PROFILERS: 'wall'
DD_PROFILING_PROFILERS: 'wall',
DD_PROFILING_EXPERIMENTAL_CODEHOTSPOTS_ENABLED: '1'
}
const options = {
logger: {
Expand All @@ -143,6 +148,7 @@ describe('config', () => {
expect(config.profilers).to.be.an('array')
expect(config.profilers.length).to.equal(1)
expect(config.profilers[0]).to.be.an.instanceOf(WallProfiler)
expect(config.profilers[0].codeHotspotsEnabled()).true
})

it('should support profiler config with DD_PROFILING_XXX_ENABLED', () => {
Expand Down Expand Up @@ -190,7 +196,8 @@ describe('config', () => {

it('should prioritize options over env variables', () => {
process.env = {
DD_PROFILING_PROFILERS: 'wall'
DD_PROFILING_PROFILERS: 'space',
DD_PROFILING_EXPERIMENTAL_CODEHOTSPOTS_ENABLED: '1'
}
const options = {
logger: {
Expand All @@ -199,14 +206,16 @@ describe('config', () => {
warn () {},
error () {}
},
profilers: ['space']
profilers: ['wall'],
codeHotspotsEnabled: false
}

const config = new Config(options)

expect(config.profilers).to.be.an('array')
expect(config.profilers.length).to.equal(1)
expect(config.profilers[0]).to.be.an.instanceOf(SpaceProfiler)
expect(config.profilers[0]).to.be.an.instanceOf(WallProfiler)
expect(config.profilers[0].codeHotspotsEnabled()).false
})

it('should support tags', () => {
Expand Down
Loading

0 comments on commit 404787c

Please sign in to comment.