diff --git a/.github/workflows/appsec.yml b/.github/workflows/appsec.yml index f41b18f9d53..66990a1147f 100644 --- a/.github/workflows/appsec.yml +++ b/.github/workflows/appsec.yml @@ -122,7 +122,7 @@ jobs: express: runs-on: ubuntu-latest env: - PLUGINS: express|body-parser|cookie-parser + PLUGINS: express|body-parser|cookie-parser|multer steps: - uses: actions/checkout@v4 - uses: ./.github/actions/node/setup diff --git a/.github/workflows/llmobs.yml b/.github/workflows/llmobs.yml new file mode 100644 index 00000000000..a1e3502a8a0 --- /dev/null +++ b/.github/workflows/llmobs.yml @@ -0,0 +1,49 @@ +name: LLMObs + +on: + pull_request: + push: + branches: [master] + schedule: + - cron: '0 4 * * *' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref || github.run_id }} + cancel-in-progress: true + +jobs: + sdk: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/testagent/start + - uses: ./.github/actions/node/setup + - uses: ./.github/actions/install + - uses: ./.github/actions/node/18 + - run: yarn test:llmobs:sdk:ci + - uses: ./.github/actions/node/20 + - run: yarn test:llmobs:sdk:ci + - uses: ./.github/actions/node/latest + - run: yarn test:llmobs:sdk:ci + - if: always() + uses: ./.github/actions/testagent/logs + - uses: codecov/codecov-action@v3 + + openai: + runs-on: ubuntu-latest + env: + PLUGINS: openai + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/testagent/start + - uses: ./.github/actions/node/setup + - uses: ./.github/actions/install + - uses: ./.github/actions/node/oldest + - run: yarn test:llmobs:plugins:ci + shell: bash + - uses: ./.github/actions/node/latest + - run: yarn test:llmobs:plugins:ci + shell: bash + - uses: codecov/codecov-action@v3 + - if: always() + uses: ./.github/actions/testagent/logs diff --git a/.github/workflows/plugins.yml b/.github/workflows/plugins.yml index dfc032a6118..0e067a98fb5 100644 --- a/.github/workflows/plugins.yml +++ b/.github/workflows/plugins.yml @@ -221,6 +221,14 @@ jobs: - uses: actions/checkout@v4 - uses: ./.github/actions/plugins/test + body-parser: + runs-on: ubuntu-latest + env: + PLUGINS: body-parser + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/plugins/test + bunyan: runs-on: ubuntu-latest env: @@ -259,6 +267,14 @@ jobs: - run: yarn test:plugins:ci - uses: codecov/codecov-action@v2 + cookie-parser: + runs-on: ubuntu-latest + env: + PLUGINS: cookie-parser + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/plugins/test + couchbase: strategy: matrix: @@ -366,7 +382,22 @@ jobs: express: runs-on: ubuntu-latest env: - PLUGINS: express|body-parser|cookie-parser + PLUGINS: express + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/plugins/test + + express-mongo-sanitize: + runs-on: ubuntu-latest + services: + mongodb: + image: circleci/mongo + ports: + - 27017:27017 + env: + PLUGINS: express-mongo-sanitize + PACKAGE_NAMES: express-mongo-sanitize + SERVICES: mongo steps: - uses: actions/checkout@v4 - uses: ./.github/actions/plugins/test @@ -548,6 +579,23 @@ jobs: - uses: actions/checkout@v4 - uses: ./.github/actions/plugins/test + mariadb: + runs-on: ubuntu-latest + services: + mysql: + image: mariadb:10.4 + env: + MYSQL_ALLOW_EMPTY_PASSWORD: 'yes' + MYSQL_DATABASE: 'db' + ports: + - 3306:3306 + env: + PLUGINS: mariadb + SERVICES: mariadb + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/plugins/test + memcached: runs-on: ubuntu-latest services: @@ -641,12 +689,29 @@ jobs: ports: - 3306:3306 env: - PLUGINS: mysql|mysql2|mariadb # TODO: move mysql2 to its own job + PLUGINS: mysql SERVICES: mysql steps: - uses: actions/checkout@v4 - uses: ./.github/actions/plugins/test + mysql2: + runs-on: ubuntu-latest + services: + mysql: + image: mariadb:10.4 + env: + MYSQL_ALLOW_EMPTY_PASSWORD: 'yes' + MYSQL_DATABASE: 'db' + ports: + - 3306:3306 + env: + PLUGINS: mysql2 + SERVICES: mysql2 + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/plugins/test + net: runs-on: ubuntu-latest env: @@ -934,6 +999,14 @@ jobs: - uses: actions/checkout@v4 - uses: ./.github/actions/plugins/test + url: + runs-on: ubuntu-latest + env: + PLUGINS: url + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/plugins/test + when: runs-on: ubuntu-latest env: diff --git a/.github/workflows/project.yml b/.github/workflows/project.yml index 588e148fdeb..38c43297947 100644 --- a/.github/workflows/project.yml +++ b/.github/workflows/project.yml @@ -44,6 +44,20 @@ jobs: - uses: ./.github/actions/install - run: node node_modules/.bin/mocha --colors --timeout 30000 integration-tests/init.spec.js + integration-guardrails-unsupported: + strategy: + matrix: + version: ['0.8', '0.10', '0.12', '4', '6', '8', '10'] + runs-on: ubuntu-latest + env: + DD_INJECTION_ENABLED: 'true' + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v3 + with: + node-version: ${{ matrix.version }} + - run: node ./init + integration-ci: strategy: matrix: diff --git a/.gitignore b/.gitignore index a8dcafe063b..773f16d5a90 100644 --- a/.gitignore +++ b/.gitignore @@ -106,6 +106,7 @@ typings/ # End of https://www.gitignore.io/api/node,macos,visualstudiocode +.github/notes .next package-lock.json out diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 714eb493581..6da75a763ac 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -19,8 +19,16 @@ onboarding_tests_installer: parallel: matrix: - ONBOARDING_FILTER_WEBLOG: [test-app-nodejs,test-app-nodejs-container] - SCENARIO: [ INSTALLER_AUTO_INJECTION, SIMPLE_AUTO_INJECTION_PROFILING ] + SCENARIO: [ SIMPLE_INSTALLER_AUTO_INJECTION, SIMPLE_AUTO_INJECTION_PROFILING ] onboarding_tests_k8s_injection: variables: WEBLOG_VARIANT: sample-app + +requirements_json_test: + rules: + - when: on_success + variables: + REQUIREMENTS_BLOCK_JSON_PATH: ".gitlab/requirements_block.json" + REQUIREMENTS_ALLOW_JSON_PATH: ".gitlab/requirements_allow.json" + diff --git a/.gitlab/prepare-oci-package.sh b/.gitlab/prepare-oci-package.sh index b65b3e73d5c..af579f04355 100755 --- a/.gitlab/prepare-oci-package.sh +++ b/.gitlab/prepare-oci-package.sh @@ -21,3 +21,5 @@ fi echo -n $JS_PACKAGE_VERSION > packaging/sources/version cd packaging + +cp ../requirements.json sources/requirements.json diff --git a/.gitlab/requirements_allow.json b/.gitlab/requirements_allow.json new file mode 100644 index 00000000000..e832f6e7132 --- /dev/null +++ b/.gitlab/requirements_allow.json @@ -0,0 +1,19 @@ +[ + {"name": "min glibc x64", "filepath": "/some/path", "args": [], "envars": [], "host": {"os": "linux", "arch": "x64", "libc": "glibc:2.17"}}, + {"name": "ok glibc x64", "filepath": "/some/path", "args": [], "envars": [], "host": {"os": "linux", "arch": "x64", "libc": "glibc:2.23"}}, + {"name": "high glibc x64", "filepath": "/some/path", "args": [], "envars": [], "host": {"os": "linux", "arch": "x64", "libc": "glibc:3.0"}}, + {"name": "musl x64", "filepath": "/some/path", "args": [], "envars": [], "host": {"os": "linux", "arch": "x64", "libc": "musl:1.2.2"}}, + {"name": "min glibc arm64", "filepath": "/some/path", "args": [], "envars": [], "host": {"os": "linux", "arch": "arm64", "libc": "glibc:2.17"}}, + {"name": "ok glibc arm64", "filepath": "/some/path", "args": [], "envars": [], "host": {"os": "linux", "arch": "arm64", "libc": "glibc:2.27"}}, + {"name": "glibc x86","filepath": "/some/path", "args": [], "envars": [], "host": {"os": "linux", "arch": "x86", "libc": "glibc:2.19"}}, + {"name": "musl arm","filepath": "/some/path", "args": [], "envars": [], "host": {"os": "linux", "arch": "arm", "libc": "musl:1.2.2"}}, + {"name": "musl arm64", "filepath": "/some/path", "args": [], "envars": [], "host": {"os": "linux", "arch": "arm64", "libc": "musl:1.2.2"}}, + {"name": "musl x64", "filepath": "/some/path", "args": [], "envars": [], "host": {"os": "linux", "arch": "x64", "libc": "musl:1.2.2"}}, + {"name": "musl x86", "filepath": "/some/path", "args": [], "envars": [], "host": {"os": "linux", "arch": "x86", "libc": "musl:1.2.2"}}, + {"name": "windows x64", "filepath": "/some/path", "args": [], "envars": [], "host": {"os": "windows", "arch": "x64"}}, + {"name": "windows x86", "filepath": "/some/path", "args": [], "envars": [], "host": {"os": "windows", "arch": "x86"}}, + {"name": "macos x64", "filepath": "/some/path", "args": [], "envars": [], "host": {"os": "darwin", "arch": "x64"}}, + {"name": "macos arm64", "filepath": "/some/path", "args": [], "envars": [], "host": {"os": "darwin", "arch": "arm64"}}, + {"name": "node app", "filepath": "/pathto/node", "args": ["/pathto/node", "./app.js"], "envars": [], "host": {"os": "linux", "arch": "x64", "libc": "glibc:2.40"}}, + {"name": "ts-node app", "filepath": "/pathto/ts-node", "args": ["/pathto/ts-node", "./app.js"], "envars": [], "host": {"os": "linux", "arch": "x64", "libc": "glibc:2.40"}} +] diff --git a/.gitlab/requirements_block.json b/.gitlab/requirements_block.json new file mode 100644 index 00000000000..e728f802915 --- /dev/null +++ b/.gitlab/requirements_block.json @@ -0,0 +1,11 @@ +[ + {"name": "unsupported 2.x glibc x64","filepath": "/some/path", "args": [], "envars": [], "host": {"os": "linux", "arch": "x64", "libc": "glibc:2.16"}}, + {"name": "unsupported 1.x glibc x64","filepath": "/some/path", "args": [], "envars": [], "host": {"os": "linux", "arch": "x64", "libc": "glibc:1.22"}}, + {"name": "unsupported 2.x.x glibc x64","filepath": "/some/path", "args": [], "envars": [], "host": {"os": "linux", "arch": "x64", "libc": "glibc:2.16.9"}}, + {"name": "unsupported 2.x glibc arm64","filepath": "/some/path", "args": [], "envars": [], "host": {"os": "linux", "arch": "arm64", "libc": "glibc:2.16"}}, + {"name": "unsupported 2.x.x glibc x64","filepath": "/some/path", "args": [], "envars": [], "host": {"os": "linux", "arch": "arm64", "libc": "glibc:2.16.9"}}, + {"name": "unsupported 2.x.x glibc x86","filepath": "/some/path", "args": [], "envars": [], "host": {"os": "linux", "arch": "x86", "libc": "glibc:2.17"}}, + {"name": "npm","filepath": "/pathto/node", "args": ["/pathto/node", "/pathto/npm-cli.js"], "envars": [], "host": {"os": "linux", "arch": "x64", "libc": "glibc:2.40"}}, + {"name": "yarn","filepath": "/pathto/node", "args": ["/pathto/node", "/pathto/yarn.js"], "envars": [], "host": {"os": "linux", "arch": "x64", "libc": "glibc:2.40"}}, + {"name": "pnpm","filepath": "/pathto/node", "args": ["/pathto/node", "/pathto/pnpm.cjs"], "envars": [], "host": {"os": "linux", "arch": "x64", "libc": "glibc:2.40"}} +] diff --git a/CODEOWNERS b/CODEOWNERS index da66c3557b0..3b45215923f 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -53,6 +53,11 @@ /packages/dd-trace/src/service-naming/ @Datadog/apm-idm-js /packages/dd-trace/test/service-naming/ @Datadog/apm-idm-js +/packages/dd-trace/src/llmobs/ @DataDog/ml-observability +/packages/dd-trace/test/llmobs/ @DataDog/ml-observability +/packages/datadog-plugin-openai/ @DataDog/ml-observability +/packages/datadog-instrumentations/src/openai.js @DataDog/ml-observability + # CI /.github/workflows/appsec.yml @DataDog/asm-js /.github/workflows/ci-visibility-performance.yml @DataDog/ci-app-libraries diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ea960983105..30410bc3b5a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -72,12 +72,18 @@ Eventually we plan to look into putting these permission-required tests behind a ## Development Requirements -Since this project supports multiple Node versions, using a version -manager such as [nvm](https://github.com/creationix/nvm) is recommended. +Since this project supports multiple Node.js versions, using a version manager +such as [nvm](https://github.com/creationix/nvm) is recommended. If you're +unsure which version of Node.js to use, just use the latest version, which +should always work. -We use [yarn](https://yarnpkg.com/) for its workspace functionality, so make sure to install that as well. +We use [yarn](https://yarnpkg.com/) 1.x for its workspace functionality, so make sure to install that as well. The easist way to install yarn 1.x with with npm: -To install dependencies once you have Node and yarn installed, run: +```sh +$ npm install -g yarn +``` + +To install dependencies once you have Node and yarn installed, run this in the project directory: ```sh $ yarn @@ -91,23 +97,42 @@ $ yarn The `pg-native` package requires `pg_config` to be in your `$PATH` to be able to install. Please refer to [the "Install" section](https://github.com/brianc/node-postgres/tree/master/packages/pg-native#install) of the `pg-native` documentation for how to ensure your environment is configured correctly. -### Setup - -Before running _plugin_ tests, the data stores need to be running. -The easiest way to start all of them is to use the provided -docker-compose configuration: +### Plugin Tests -```sh -$ docker-compose up -d -V --remove-orphans --force-recreate -$ yarn services -``` +Before running _plugin_ tests, the supporting docker containers need to be running. You _can_ attempt to start all of them using docker-compose, but that's a drain on your system, and not all the images will even run at all on AMD64 devices. > **Note** > The `aerospike`, `couchbase`, `grpc` and `oracledb` instrumentations rely on > native modules that do not compile on ARM64 devices (for example M1/M2 Mac) > - their tests cannot be run locally on these devices. -### Unit Tests +Instead, you can follow this procedure for the plugin you want to run tests for: + +1. Check the CI config in `.github/workflows/plugins.yml` to see what the appropriate values for the `SERVICES` and `PLUGINS` environment variables are for the plugin you're trying to test (noting that not all plugins require `SERVICES`). For example, for the `amqplib` plugin, the `SERVICES` value is `rabbitmq`, and the `PLUGINS` value is `amqplib`. +2. Run the appropriate docker-compose command to start the required services. For example, for the `amqplib` plugin, you would run: `docker compose up -d rabbitmq`. +3. Run `yarn services`, with the environment variables set above. This will install any versions of the library to be tested against into the `versions` directory, and check that the appropriate services are running prior to running the test. +4. Now, you can run `yarn test:plugins` with the environment variables set above to run the tests for the plugin you're interested in. + +To wrap that all up into a simple few lines of shell commands, here is all of the above, for the `amqplib` plugin: + +```sh +# These are exported for simplicity, but you can also just set them inline. +export SERVICES="rabbitmq" # retrieved from .github/workflows/plugins.yml +export PLUGINS="amqplib" # retrieved from .github/workflows/plugins.yml + +docker compose up -d $SERVICES +yarn services + +yarn test:plugins # This one actually runs the tests. Can be run many times. +``` + +You can also run the tests for multiple plugins at once by separating them with a pipe (`|`) delimiter. For example, to run the tests for the `amqplib` and `bluebird` plugins: + +```sh +PLUGINS="amqplib|bluebird" yarn test:plugins +``` + +### Other Unit Tests There are several types of unit tests, for various types of components. The following commands may be useful: @@ -124,17 +149,6 @@ $ yarn test:instrumentations Several other components have test commands as well. See `package.json` for details. -To test _plugins_ (i.e. components in `packages/datadog-plugin-XXXX` -directories, set the `PLUGINS` environment variable to the plugin you're -interested in, and use `yarn test:plugins`. If you need to test multiple -plugins you may separate then with a pipe (`|`) delimiter. Here's an -example testing the `express` and `bluebird` plugins: - -```sh -PLUGINS="express|bluebird" yarn test:plugins -``` - - ### Linting We use [ESLint](https://eslint.org) to make sure that new code @@ -146,6 +160,9 @@ To run the linter, use: $ yarn lint ``` +This also checks that the `LICENSE-3rdparty.csv` file is up-to-date, and checks +dependencies for vulnerabilities. + ### Benchmarks diff --git a/docker-compose.yml b/docker-compose.yml index a16fef8893d..81bdd3c2032 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -129,7 +129,7 @@ services: - KAFKA_LISTENERS=PLAINTEXT://:9092,CONTROLLER://:9093 - KAFKA_CONTROLLER_QUORUM_VOTERS=1@127.0.0.1:9093 - KAFKA_CONTROLLER_LISTENER_NAMES=CONTROLLER - - KAFKA_CLUSTER_ID=r4zt_wrqTRuT7W2NJsB_GA + - CLUSTER_ID=5L6g3nShT-eMCtK--X86sw - KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://127.0.0.1:9092 - KAFKA_INTER_BROKER_LISTENER_NAME=PLAINTEXT - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT diff --git a/docs/package.json b/docs/package.json index 30cb5dd848a..0ec46d7584a 100644 --- a/docs/package.json +++ b/docs/package.json @@ -4,7 +4,7 @@ "main": "typedoc.js", "scripts": { "build": "typedoc ../index.d.ts && ./add-redirects.sh", - "pretest": "tsc -p . && tsc test", + "pretest": "tsc -p . && tsc --types node test", "test": "node test" }, "license": "BSD-3-Clause", diff --git a/docs/test.ts b/docs/test.ts index 9c6c7df6211..37342718c2a 100644 --- a/docs/test.ts +++ b/docs/test.ts @@ -324,6 +324,9 @@ tracer.use('http', { tracer.use('http', { client: httpClientOptions }); +tracer.use('http', { + enablePropagationWithAmazonHeaders: true +}); tracer.use('http2'); tracer.use('http2', { server: http2ServerOptions @@ -536,3 +539,80 @@ const otelTraceId: string = spanContext.traceId const otelSpanId: string = spanContext.spanId const otelTraceFlags: number = spanContext.traceFlags const otelTraceState: opentelemetry.TraceState = spanContext.traceState! + +// -- LLM Observability -- +const llmobsEnableOptions = { + mlApp: 'mlApp', + agentlessEnabled: true +} +tracer.init({ + llmobs: llmobsEnableOptions, +}) +const llmobs = tracer.llmobs +const enabled = llmobs.enabled + +// manually enable +llmobs.enable({ + mlApp: 'mlApp', + agentlessEnabled: true +}) + +// manually disable +llmobs.disable() + +// trace block of code +llmobs.trace({ name: 'name', kind: 'llm' }, () => {}) +llmobs.trace({ kind: 'llm', name: 'myLLM', modelName: 'myModel', modelProvider: 'myProvider' }, () => {}) +llmobs.trace({ name: 'name', kind: 'llm' }, (span, cb) => { + llmobs.annotate(span, {}) + span.setTag('foo', 'bar') + cb(new Error('boom')) +}) + +// wrap a function +llmobs.wrap({ kind: 'llm' }, function myLLM () {})() +llmobs.wrap({ kind: 'llm', name: 'myLLM', modelName: 'myModel', modelProvider: 'myProvider' }, function myFunction () {})() + +// export a span +llmobs.enable({ mlApp: 'myApp' }) +llmobs.trace({ kind: 'llm', name: 'myLLM' }, (span) => { + const llmobsSpanCtx = llmobs.exportSpan(span) + llmobsSpanCtx.traceId; + llmobsSpanCtx.spanId; + + // submit evaluation + llmobs.disable() + llmobs.submitEvaluation(llmobsSpanCtx, { + label: 'my-eval-metric', + metricType: 'categorical', + value: 'good', + mlApp: 'myApp', + tags: {}, + timestampMs: Date.now() + }) +}) + +// annotate a span +llmobs.annotate({ + inputData: 'input', + outputData: 'output', + metadata: {}, + metrics: { + inputTokens: 10, + outputTokens: 5, + totalTokens: 15 + }, + tags: {} +}) +llmobs.annotate(span, { + inputData: 'input', + outputData: 'output', + metadata: {}, + metrics: {}, + tags: {} +}) + + + +// flush +llmobs.flush() diff --git a/docs/tsconfig.json b/docs/tsconfig.json index 8bb0763d679..263508a814d 100644 --- a/docs/tsconfig.json +++ b/docs/tsconfig.json @@ -4,7 +4,8 @@ "moduleResolution": "node", "module": "commonjs", "baseUrl": ".", - "strict": true + "strict": true, + "types": ["node"] }, "files": [ "../index.d.ts" diff --git a/index.d.ts b/index.d.ts index 31cf89665b0..11c5b54cc3b 100644 --- a/index.d.ts +++ b/index.d.ts @@ -137,6 +137,11 @@ interface Tracer extends opentracing.Tracer { TracerProvider: tracer.opentelemetry.TracerProvider; dogstatsd: tracer.DogStatsD; + + /** + * LLM Observability SDK + */ + llmobs: tracer.llmobs.LLMObs; } // left out of the namespace, so it @@ -753,6 +758,11 @@ declare namespace tracer { */ maxDepth?: number } + + /** + * Configuration enabling LLM Observability. Enablement is superceded by the DD_LLMOBS_ENABLED environment variable. + */ + llmobs?: llmobs.LLMObsEnableOptions } /** @@ -1034,6 +1044,14 @@ declare namespace tracer { * @default code => code < 500 */ validateStatus?: (code: number) => boolean; + + /** + * Enable injection of tracing headers into requests signed with AWS IAM headers. + * Disable this if you get AWS signature errors (HTTP 403). + * + * @default false + */ + enablePropagationWithAmazonHeaders?: boolean; } /** @hidden */ @@ -2205,6 +2223,331 @@ declare namespace tracer { */ telemetryVerbosity?: string } + + export namespace llmobs { + export interface LLMObs { + + /** + * Whether or not LLM Observability is enabled. + */ + enabled: boolean, + + /** + * Enable LLM Observability tracing. + */ + enable (options: LLMObsEnableOptions): void, + + /** + * Disable LLM Observability tracing. + */ + disable (): void, + + /** + * Instruments a function by automatically creating a span activated on its + * scope. + * + * The span will automatically be finished when one of these conditions is + * met: + * + * * The function returns a promise, in which case the span will finish when + * the promise is resolved or rejected. + * * The function takes a callback as its second parameter, in which case the + * span will finish when that callback is called. + * * The function doesn't accept a callback and doesn't return a promise, in + * which case the span will finish at the end of the function execution. + * @param fn The function to instrument. + * @param options Optional LLM Observability span options. + * @returns The return value of the function. + */ + trace (options: LLMObsNamedSpanOptions, fn: (span: tracer.Span, done: (error?: Error) => void) => T): T + + /** + * Wrap a function to automatically create a span activated on its + * scope when it's called. + * + * The span will automatically be finished when one of these conditions is + * met: + * + * * The function returns a promise, in which case the span will finish when + * the promise is resolved or rejected. + * * The function takes a callback as its last parameter, in which case the + * span will finish when that callback is called. + * * The function doesn't accept a callback and doesn't return a promise, in + * which case the span will finish at the end of the function execution. + * @param fn The function to instrument. + * @param options Optional LLM Observability span options. + * @returns A new function that wraps the provided function with span creation. + */ + wrap any> (options: LLMObsNamelessSpanOptions, fn: T): T + + /** + * Decorate a function in a javascript runtime that supports function decorators. + * Note that this is **not** supported in the Node.js runtime, but is in TypeScript. + * + * In TypeScript, this decorator is only supported in contexts where general TypeScript + * function decorators are supported. + * + * @param options Optional LLM Observability span options. + */ + decorate (options: llmobs.LLMObsNamelessSpanOptions): any + + /** + * Returns a representation of a span to export its span and trace IDs. + * If no span is provided, the current LLMObs-type span will be used. + * @param span Optional span to export. + * @returns An object containing the span and trace IDs. + */ + exportSpan (span?: tracer.Span): llmobs.ExportedLLMObsSpan + + + /** + * Sets inputs, outputs, tags, metadata, and metrics as provided for a given LLM Observability span. + * Note that with the exception of tags, this method will override any existing values for the provided fields. + * + * For example: + * ```javascript + * llmobs.trace({ kind: 'llm', name: 'myLLM', modelName: 'gpt-4o', modelProvider: 'openai' }, () => { + * llmobs.annotate({ + * inputData: [{ content: 'system prompt, role: 'system' }, { content: 'user prompt', role: 'user' }], + * outputData: { content: 'response', role: 'ai' }, + * metadata: { temperature: 0.7 }, + * tags: { host: 'localhost' }, + * metrics: { inputTokens: 10, outputTokens: 20, totalTokens: 30 } + * }) + * }) + * ``` + * + * @param span The span to annotate (defaults to the current LLM Observability span if not provided) + * @param options An object containing the inputs, outputs, tags, metadata, and metrics to set on the span. + */ + annotate (options: llmobs.AnnotationOptions): void + annotate (span: tracer.Span | undefined, options: llmobs.AnnotationOptions): void + + /** + * Submits a custom evalutation metric for a given span ID and trace ID. + * @param spanContext The span context of the span to submit the evaluation metric for. + * @param options An object containing the label, metric type, value, and tags of the evaluation metric. + */ + submitEvaluation (spanContext: llmobs.ExportedLLMObsSpan, options: llmobs.EvaluationOptions): void + + /** + * Flushes any remaining spans and evaluation metrics to LLM Observability. + */ + flush (): void + } + + interface EvaluationOptions { + /** + * The name of the evalutation metric + */ + label: string, + + /** + * The type of evaluation metric, one of 'categorical' or 'score' + */ + metricType: 'categorical' | 'score', + + /** + * The value of the evaluation metric. + * Must be string for 'categorical' metrics and number for 'score' metrics. + */ + value: string | number, + + /** + * An object of string key-value pairs to tag the evaluation metric with. + */ + tags?: { [key: string]: any }, + + /** + * The name of the ML application + */ + mlApp?: string, + + /** + * The timestamp in milliseconds when the evaluation metric result was generated. + */ + timestampMs?: number + } + + interface Document { + /** + * Document text + */ + text?: string, + + /** + * Document name + */ + name?: string, + + /** + * Document ID + */ + id?: string, + + /** + * Score of the document retrieval as a source of ground truth + */ + score?: number + } + + /** + * Represents a single LLM chat model message + */ + interface Message { + /** + * Content of the message. + */ + content: string, + + /** + * Role of the message (ie system, user, ai) + */ + role?: string, + + /** + * Tool calls of the message + */ + toolCalls?: ToolCall[], + } + + /** + * Represents a single tool call for an LLM chat model message + */ + interface ToolCall { + /** + * Name of the tool + */ + name?: string, + + /** + * Arguments passed to the tool + */ + arguments?: { [key: string]: any }, + + /** + * The tool ID + */ + toolId?: string, + + /** + * The tool type + */ + type?: string + } + + /** + * Annotation options for LLM Observability spans. + */ + interface AnnotationOptions { + /** + * A single input string, object, or a list of objects based on the span kind: + * 1. LLM spans: accepts a string, or an object of the form {content: "...", role: "..."}, or a list of objects with the same signature. + * 2. Embedding spans: accepts a string, list of strings, or an object of the form {text: "...", ...}, or a list of objects with the same signature. + * 3. Other: any JSON serializable type + */ + inputData?: string | Message | Message[] | Document | Document[] | { [key: string]: any }, + + /** + * A single output string, object, or a list of objects based on the span kind: + * 1. LLM spans: accepts a string, or an object of the form {content: "...", role: "..."}, or a list of objects with the same signature. + * 2. Retrieval spans: An object containing any of the key value pairs {name: str, id: str, text: str, source: number} or a list of dictionaries with the same signature. + * 3. Other: any JSON serializable type + */ + outputData?: string | Message | Message[] | Document | Document[] | { [key: string]: any }, + + /** + * Object of JSON serializable key-value metadata pairs relevant to the input/output operation described by the LLM Observability span. + */ + metadata?: { [key: string]: any }, + + /** + * Object of JSON seraliazable key-value metrics (number) pairs, such as `{input,output,total}Tokens` + */ + metrics?: { [key: string]: number }, + + /** + * Object of JSON serializable key-value tag pairs to set or update on the LLM Observability span regarding the span's context. + */ + tags?: { [key: string]: any } + } + + /** + * An object containing the span ID and trace ID of interest + */ + interface ExportedLLMObsSpan { + /** + * Trace ID associated with the span of interest + */ + traceId: string, + + /** + * Span ID associated with the span of interest + */ + spanId: string, + } + + interface LLMObsSpanOptions extends SpanOptions { + /** + * LLM Observability span kind. One of `agent`, `workflow`, `task`, `tool`, `retrieval`, `embedding`, or `llm`. + */ + kind: llmobs.spanKind, + + /** + * The ID of the underlying user session. Required for tracking sessions. + */ + sessionId?: string, + + /** + * The name of the ML application that the agent is orchestrating. + * If not provided, the default value will be set to mlApp provided during initalization, or `DD_LLMOBS_ML_APP`. + */ + mlApp?: string, + + /** + * The name of the invoked LLM or embedding model. Only used on `llm` and `embedding` spans. + */ + modelName?: string, + + /** + * The name of the invoked LLM or embedding model provider. Only used on `llm` and `embedding` spans. + * If not provided for LLM or embedding spans, a default value of 'custom' will be set. + */ + modelProvider?: string, + } + + interface LLMObsNamedSpanOptions extends LLMObsSpanOptions { + /** + * The name of the traced operation. This is a required option. + */ + name: string, + } + + interface LLMObsNamelessSpanOptions extends LLMObsSpanOptions { + /** + * The name of the traced operation. + */ + name?: string, + } + + /** + * Options for enabling LLM Observability tracing. + */ + interface LLMObsEnableOptions { + /** + * The name of your ML application. + */ + mlApp?: string, + + /** + * Set to `true` to disbale sending data that requires a Datadog Agent. + */ + agentlessEnabled?: boolean, + } + + /** @hidden */ + type spanKind = 'agent' | 'workflow' | 'task' | 'tool' | 'retrieval' | 'embedding' | 'llm' + } } /** diff --git a/init.js b/init.js index ecdb37daee8..8b183fc17ab 100644 --- a/init.js +++ b/init.js @@ -1,58 +1,71 @@ 'use strict' -const path = require('path') -const Module = require('module') -const semver = require('semver') -const log = require('./packages/dd-trace/src/log') -const { isTrue } = require('./packages/dd-trace/src/util') -const telemetry = require('./packages/dd-trace/src/telemetry/init-telemetry') +/* eslint-disable no-var */ -let initBailout = false -let clobberBailout = false -const forced = isTrue(process.env.DD_INJECT_FORCE) +var NODE_MAJOR = require('./version').NODE_MAJOR -if (process.env.DD_INJECTION_ENABLED) { - // If we're running via single-step install, and we're not in the app's - // node_modules, then we should not initialize the tracer. This prevents - // single-step-installed tracer from clobbering the manually-installed tracer. - let resolvedInApp - const entrypoint = process.argv[1] - try { - resolvedInApp = Module.createRequire(entrypoint).resolve('dd-trace') - } catch (e) { - // Ignore. If we can't resolve the module, we assume it's not in the app. - } - if (resolvedInApp) { - const ourselves = path.join(__dirname, 'index.js') - if (ourselves !== resolvedInApp) { - clobberBailout = true +// We use several things that are not supported by older versions of Node: +// - AsyncLocalStorage +// - The `semver` module +// - dc-polyfill +// - Mocha (for testing) +// and probably others. +// TODO: Remove all these dependencies so that we can report telemetry. +if (NODE_MAJOR >= 12) { + var path = require('path') + var Module = require('module') + var semver = require('semver') + var log = require('./packages/dd-trace/src/log') + var isTrue = require('./packages/dd-trace/src/util').isTrue + var telemetry = require('./packages/dd-trace/src/telemetry/init-telemetry') + + var initBailout = false + var clobberBailout = false + var forced = isTrue(process.env.DD_INJECT_FORCE) + + if (process.env.DD_INJECTION_ENABLED) { + // If we're running via single-step install, and we're not in the app's + // node_modules, then we should not initialize the tracer. This prevents + // single-step-installed tracer from clobbering the manually-installed tracer. + var resolvedInApp + var entrypoint = process.argv[1] + try { + resolvedInApp = Module.createRequire(entrypoint).resolve('dd-trace') + } catch (e) { + // Ignore. If we can't resolve the module, we assume it's not in the app. + } + if (resolvedInApp) { + var ourselves = path.join(__dirname, 'index.js') + if (ourselves !== resolvedInApp) { + clobberBailout = true + } } - } - // If we're running via single-step install, and the runtime doesn't match - // the engines field in package.json, then we should not initialize the tracer. - if (!clobberBailout) { - const { engines } = require('./package.json') - const version = process.versions.node - if (!semver.satisfies(version, engines.node)) { - initBailout = true - telemetry([ - { name: 'abort', tags: ['reason:incompatible_runtime'] }, - { name: 'abort.runtime', tags: [] } - ]) - log.info('Aborting application instrumentation due to incompatible_runtime.') - log.info(`Found incompatible runtime nodejs ${version}, Supported runtimes: nodejs ${engines.node}.`) - if (forced) { - log.info('DD_INJECT_FORCE enabled, allowing unsupported runtimes and continuing.') + // If we're running via single-step install, and the runtime doesn't match + // the engines field in package.json, then we should not initialize the tracer. + if (!clobberBailout) { + var engines = require('./package.json').engines + var version = process.versions.node + if (!semver.satisfies(version, engines.node)) { + initBailout = true + telemetry([ + { name: 'abort', tags: ['reason:incompatible_runtime'] }, + { name: 'abort.runtime', tags: [] } + ]) + log.info('Aborting application instrumentation due to incompatible_runtime.') + log.info('Found incompatible runtime nodejs ' + version + ', Supported runtimes: nodejs ' + engines.node + '.') + if (forced) { + log.info('DD_INJECT_FORCE enabled, allowing unsupported runtimes and continuing.') + } } } } -} -if (!clobberBailout && (!initBailout || forced)) { - const tracer = require('.') - tracer.init() - module.exports = tracer - telemetry('complete', [`injection_forced:${forced && initBailout ? 'true' : 'false'}`]) - log.info('Application instrumentation bootstrapping complete') + if (!clobberBailout && (!initBailout || forced)) { + var tracer = require('.') + tracer.init() + module.exports = tracer + telemetry('complete', ['injection_forced:' + (forced && initBailout ? 'true' : 'false')]) + log.info('Application instrumentation bootstrapping complete') + } } diff --git a/integration-tests/appsec/multer.spec.js b/integration-tests/appsec/multer.spec.js new file mode 100644 index 00000000000..b87d7d268b0 --- /dev/null +++ b/integration-tests/appsec/multer.spec.js @@ -0,0 +1,142 @@ +'use strict' + +const { assert } = require('chai') +const path = require('path') +const axios = require('axios') + +const { + createSandbox, + FakeAgent, + spawnProc +} = require('../helpers') + +const { NODE_MAJOR } = require('../../version') + +const describe = NODE_MAJOR <= 16 ? globalThis.describe.skip : globalThis.describe + +describe('multer', () => { + let sandbox, cwd, startupTestFile, agent, proc, env + + ['1.4.4-lts.1', '1.4.5-lts.1'].forEach((version) => { + describe(`v${version}`, () => { + before(async () => { + sandbox = await createSandbox(['express', `multer@${version}`]) + cwd = sandbox.folder + startupTestFile = path.join(cwd, 'appsec', 'multer', 'index.js') + }) + + after(async () => { + await sandbox.remove() + }) + + beforeEach(async () => { + agent = await new FakeAgent().start() + + env = { + AGENT_PORT: agent.port, + DD_APPSEC_RULES: path.join(cwd, 'appsec', 'multer', 'body-parser-rules.json') + } + + const execArgv = [] + + proc = await spawnProc(startupTestFile, { cwd, env, execArgv }) + }) + + afterEach(async () => { + proc.kill() + await agent.stop() + }) + + describe('Suspicious request blocking', () => { + describe('using middleware', () => { + it('should not block the request without an attack', async () => { + const form = new FormData() + form.append('key', 'value') + + const res = await axios.post(proc.url, form) + + assert.equal(res.data, 'DONE') + }) + + it('should block the request when attack is detected', async () => { + try { + const form = new FormData() + form.append('key', 'testattack') + + await axios.post(proc.url, form) + + return Promise.reject(new Error('Request should not return 200')) + } catch (e) { + assert.equal(e.response.status, 403) + } + }) + }) + + describe('not using middleware', () => { + it('should not block the request without an attack', async () => { + const form = new FormData() + form.append('key', 'value') + + const res = await axios.post(`${proc.url}/no-middleware`, form) + + assert.equal(res.data, 'DONE') + }) + + it('should block the request when attack is detected', async () => { + try { + const form = new FormData() + form.append('key', 'testattack') + + await axios.post(`${proc.url}/no-middleware`, form) + + return Promise.reject(new Error('Request should not return 200')) + } catch (e) { + assert.equal(e.response.status, 403) + } + }) + }) + }) + + describe('IAST', () => { + function assertCmdInjection ({ payload }) { + assert.isArray(payload) + assert.strictEqual(payload.length, 1) + assert.isArray(payload[0]) + + const { meta } = payload[0][0] + + assert.property(meta, '_dd.iast.json') + + const iastJson = JSON.parse(meta['_dd.iast.json']) + + assert.isTrue(iastJson.vulnerabilities.some(v => v.type === 'COMMAND_INJECTION')) + assert.isTrue(iastJson.sources.some(s => s.origin === 'http.request.body')) + } + + describe('using middleware', () => { + it('should taint multipart body', async () => { + const resultPromise = agent.assertMessageReceived(assertCmdInjection) + + const formData = new FormData() + formData.append('command', 'echo 1') + await axios.post(`${proc.url}/cmd`, formData) + + return resultPromise + }) + }) + + describe('not using middleware', () => { + it('should taint multipart body', async () => { + const resultPromise = agent.assertMessageReceived(assertCmdInjection) + + const formData = new FormData() + formData.append('command', 'echo 1') + await axios.post(`${proc.url}/cmd-no-middleware`, formData) + + return resultPromise + }) + }) + }) + }) + }) +}) diff --git a/integration-tests/appsec/multer/body-parser-rules.json b/integration-tests/appsec/multer/body-parser-rules.json new file mode 100644 index 00000000000..6b22c7cbbf6 --- /dev/null +++ b/integration-tests/appsec/multer/body-parser-rules.json @@ -0,0 +1,33 @@ +{ + "version": "2.2", + "metadata": { + "rules_version": "1.5.0" + }, + "rules": [ + { + "id": "test-rule-id-1", + "name": "test-rule-name-1", + "tags": { + "type": "security_scanner", + "category": "attack_attempt" + }, + "conditions": [ + { + "parameters": { + "inputs": [ + { + "address": "server.request.body" + } + ], + "list": [ + "testattack" + ] + }, + "operator": "phrase_match" + } + ], + "transformers": ["lowercase"], + "on_match": ["block"] + } + ] +} diff --git a/integration-tests/appsec/multer/index.js b/integration-tests/appsec/multer/index.js new file mode 100644 index 00000000000..b872af9dc8e --- /dev/null +++ b/integration-tests/appsec/multer/index.js @@ -0,0 +1,64 @@ +'use strict' + +const options = { + appsec: { + enabled: true + }, + iast: { + enabled: true, + requestSampling: 100 + } +} + +if (process.env.AGENT_PORT) { + options.port = process.env.AGENT_PORT +} + +if (process.env.AGENT_URL) { + options.url = process.env.AGENT_URL +} + +const tracer = require('dd-trace') +tracer.init(options) + +const http = require('http') +const express = require('express') +const childProcess = require('child_process') + +const multer = require('multer') +const uploadToMemory = multer({ storage: multer.memoryStorage(), limits: { fileSize: 200000 } }) + +const app = express() + +app.post('/', uploadToMemory.single('file'), (req, res) => { + res.end('DONE') +}) + +app.post('/no-middleware', (req, res) => { + uploadToMemory.none()(req, res, () => { + res.end('DONE') + }) +}) + +app.post('/cmd', uploadToMemory.single('file'), (req, res) => { + childProcess.exec(req.body.command, () => { + res.end('DONE') + }) +}) + +app.post('/cmd-no-middleware', (req, res) => { + uploadToMemory.none()(req, res, () => { + childProcess.exec(req.body.command, () => { + res.end('DONE') + }) + }) +}) + +app.get('/', (req, res) => { + res.status(200).send('hello world') +}) + +const server = http.createServer(app).listen(0, () => { + const port = server.address().port + process.send?.({ port }) +}) diff --git a/integration-tests/debugger/basic.spec.js b/integration-tests/debugger/basic.spec.js new file mode 100644 index 00000000000..3330a6c32d3 --- /dev/null +++ b/integration-tests/debugger/basic.spec.js @@ -0,0 +1,395 @@ +'use strict' + +const os = require('os') + +const { assert } = require('chai') +const { pollInterval, setup } = require('./utils') +const { assertObjectContains, assertUUID } = require('../helpers') +const { ACKNOWLEDGED, ERROR } = require('../../packages/dd-trace/src/appsec/remote_config/apply_states') +const { version } = require('../../package.json') + +describe('Dynamic Instrumentation', function () { + const t = setup() + + it('base case: target app should work as expected if no test probe has been added', async function () { + const response = await t.axios.get('/foo') + assert.strictEqual(response.status, 200) + assert.deepStrictEqual(response.data, { hello: 'foo' }) + }) + + describe('diagnostics messages', function () { + it('should send expected diagnostics messages if probe is received and triggered', function (done) { + let receivedAckUpdate = false + const probeId = t.rcConfig.config.id + const expectedPayloads = [{ + ddsource: 'dd_debugger', + service: 'node', + debugger: { diagnostics: { probeId, version: 0, status: 'RECEIVED' } } + }, { + ddsource: 'dd_debugger', + service: 'node', + debugger: { diagnostics: { probeId, version: 0, status: 'INSTALLED' } } + }, { + ddsource: 'dd_debugger', + service: 'node', + debugger: { diagnostics: { probeId, version: 0, status: 'EMITTING' } } + }] + + t.agent.on('remote-config-ack-update', (id, version, state, error) => { + assert.strictEqual(id, t.rcConfig.id) + assert.strictEqual(version, 1) + assert.strictEqual(state, ACKNOWLEDGED) + assert.notOk(error) // falsy check since error will be an empty string, but that's an implementation detail + + receivedAckUpdate = true + endIfDone() + }) + + t.agent.on('debugger-diagnostics', ({ payload }) => { + const expected = expectedPayloads.shift() + assertObjectContains(payload, expected) + assertUUID(payload.debugger.diagnostics.runtimeId) + + if (payload.debugger.diagnostics.status === 'INSTALLED') { + t.axios.get('/foo') + .then((response) => { + assert.strictEqual(response.status, 200) + assert.deepStrictEqual(response.data, { hello: 'foo' }) + }) + .catch(done) + } else { + endIfDone() + } + }) + + t.agent.addRemoteConfig(t.rcConfig) + + function endIfDone () { + if (receivedAckUpdate && expectedPayloads.length === 0) done() + } + }) + + it('should send expected diagnostics messages if probe is first received and then updated', function (done) { + let receivedAckUpdates = 0 + const probeId = t.rcConfig.config.id + const expectedPayloads = [{ + ddsource: 'dd_debugger', + service: 'node', + debugger: { diagnostics: { probeId, version: 0, status: 'RECEIVED' } } + }, { + ddsource: 'dd_debugger', + service: 'node', + debugger: { diagnostics: { probeId, version: 0, status: 'INSTALLED' } } + }, { + ddsource: 'dd_debugger', + service: 'node', + debugger: { diagnostics: { probeId, version: 1, status: 'RECEIVED' } } + }, { + ddsource: 'dd_debugger', + service: 'node', + debugger: { diagnostics: { probeId, version: 1, status: 'INSTALLED' } } + }] + const triggers = [ + () => { + t.rcConfig.config.version++ + t.agent.updateRemoteConfig(t.rcConfig.id, t.rcConfig.config) + }, + () => {} + ] + + t.agent.on('remote-config-ack-update', (id, version, state, error) => { + assert.strictEqual(id, t.rcConfig.id) + assert.strictEqual(version, ++receivedAckUpdates) + assert.strictEqual(state, ACKNOWLEDGED) + assert.notOk(error) // falsy check since error will be an empty string, but that's an implementation detail + + endIfDone() + }) + + t.agent.on('debugger-diagnostics', ({ payload }) => { + const expected = expectedPayloads.shift() + assertObjectContains(payload, expected) + assertUUID(payload.debugger.diagnostics.runtimeId) + if (payload.debugger.diagnostics.status === 'INSTALLED') triggers.shift()() + endIfDone() + }) + + t.agent.addRemoteConfig(t.rcConfig) + + function endIfDone () { + if (receivedAckUpdates === 2 && expectedPayloads.length === 0) done() + } + }) + + it('should send expected diagnostics messages if probe is first received and then deleted', function (done) { + let receivedAckUpdate = false + let payloadsProcessed = false + const probeId = t.rcConfig.config.id + const expectedPayloads = [{ + ddsource: 'dd_debugger', + service: 'node', + debugger: { diagnostics: { probeId, version: 0, status: 'RECEIVED' } } + }, { + ddsource: 'dd_debugger', + service: 'node', + debugger: { diagnostics: { probeId, version: 0, status: 'INSTALLED' } } + }] + + t.agent.on('remote-config-ack-update', (id, version, state, error) => { + assert.strictEqual(id, t.rcConfig.id) + assert.strictEqual(version, 1) + assert.strictEqual(state, ACKNOWLEDGED) + assert.notOk(error) // falsy check since error will be an empty string, but that's an implementation detail + + receivedAckUpdate = true + endIfDone() + }) + + t.agent.on('debugger-diagnostics', ({ payload }) => { + const expected = expectedPayloads.shift() + assertObjectContains(payload, expected) + assertUUID(payload.debugger.diagnostics.runtimeId) + + if (payload.debugger.diagnostics.status === 'INSTALLED') { + t.agent.removeRemoteConfig(t.rcConfig.id) + // Wait a little to see if we get any follow-up `debugger-diagnostics` messages + setTimeout(() => { + payloadsProcessed = true + endIfDone() + }, pollInterval * 2 * 1000) // wait twice as long as the RC poll interval + } + }) + + t.agent.addRemoteConfig(t.rcConfig) + + function endIfDone () { + if (receivedAckUpdate && payloadsProcessed) done() + } + }) + + const unsupporedOrInvalidProbes = [[ + 'should send expected error diagnostics messages if probe doesn\'t conform to expected schema', + 'bad config!!!', + { status: 'ERROR' } + ], [ + 'should send expected error diagnostics messages if probe type isn\'t supported', + t.generateProbeConfig({ type: 'INVALID_PROBE' }) + ], [ + 'should send expected error diagnostics messages if it isn\'t a line-probe', + t.generateProbeConfig({ where: { foo: 'bar' } }) // TODO: Use valid schema for method probe instead + ]] + + for (const [title, config, customErrorDiagnosticsObj] of unsupporedOrInvalidProbes) { + it(title, function (done) { + let receivedAckUpdate = false + + t.agent.on('remote-config-ack-update', (id, version, state, error) => { + assert.strictEqual(id, `logProbe_${config.id}`) + assert.strictEqual(version, 1) + assert.strictEqual(state, ERROR) + assert.strictEqual(error.slice(0, 6), 'Error:') + + receivedAckUpdate = true + endIfDone() + }) + + const probeId = config.id + const expectedPayloads = [{ + ddsource: 'dd_debugger', + service: 'node', + debugger: { diagnostics: { status: 'RECEIVED' } } + }, { + ddsource: 'dd_debugger', + service: 'node', + debugger: { diagnostics: customErrorDiagnosticsObj ?? { probeId, version: 0, status: 'ERROR' } } + }] + + t.agent.on('debugger-diagnostics', ({ payload }) => { + const expected = expectedPayloads.shift() + assertObjectContains(payload, expected) + const { diagnostics } = payload.debugger + assertUUID(diagnostics.runtimeId) + + if (diagnostics.status === 'ERROR') { + assert.property(diagnostics, 'exception') + assert.hasAllKeys(diagnostics.exception, ['message', 'stacktrace']) + assert.typeOf(diagnostics.exception.message, 'string') + assert.typeOf(diagnostics.exception.stacktrace, 'string') + } + + endIfDone() + }) + + t.agent.addRemoteConfig({ + product: 'LIVE_DEBUGGING', + id: `logProbe_${config.id}`, + config + }) + + function endIfDone () { + if (receivedAckUpdate && expectedPayloads.length === 0) done() + } + }) + } + }) + + describe('input messages', function () { + it('should capture and send expected payload when a log line probe is triggered', function (done) { + t.triggerBreakpoint() + + t.agent.on('debugger-input', ({ payload }) => { + const expected = { + ddsource: 'dd_debugger', + hostname: os.hostname(), + service: 'node', + message: 'Hello World!', + logger: { + name: t.breakpoint.file, + method: 'handler', + version, + thread_name: 'MainThread' + }, + 'debugger.snapshot': { + probe: { + id: t.rcConfig.config.id, + version: 0, + location: { file: t.breakpoint.file, lines: [String(t.breakpoint.line)] } + }, + language: 'javascript' + } + } + + assertObjectContains(payload, expected) + assert.match(payload.logger.thread_id, /^pid:\d+$/) + assertUUID(payload['debugger.snapshot'].id) + assert.isNumber(payload['debugger.snapshot'].timestamp) + assert.isTrue(payload['debugger.snapshot'].timestamp > Date.now() - 1000 * 60) + assert.isTrue(payload['debugger.snapshot'].timestamp <= Date.now()) + + assert.isArray(payload['debugger.snapshot'].stack) + assert.isAbove(payload['debugger.snapshot'].stack.length, 0) + for (const frame of payload['debugger.snapshot'].stack) { + assert.isObject(frame) + assert.hasAllKeys(frame, ['fileName', 'function', 'lineNumber', 'columnNumber']) + assert.isString(frame.fileName) + assert.isString(frame.function) + assert.isAbove(frame.lineNumber, 0) + assert.isAbove(frame.columnNumber, 0) + } + const topFrame = payload['debugger.snapshot'].stack[0] + // path seems to be prefeixed with `/private` on Mac + assert.match(topFrame.fileName, new RegExp(`${t.appFile}$`)) + assert.strictEqual(topFrame.function, 'handler') + assert.strictEqual(topFrame.lineNumber, t.breakpoint.line) + assert.strictEqual(topFrame.columnNumber, 3) + + done() + }) + + t.agent.addRemoteConfig(t.rcConfig) + }) + + it('should respond with updated message if probe message is updated', function (done) { + const expectedMessages = ['Hello World!', 'Hello Updated World!'] + const triggers = [ + async () => { + await t.axios.get('/foo') + t.rcConfig.config.version++ + t.rcConfig.config.template = 'Hello Updated World!' + t.agent.updateRemoteConfig(t.rcConfig.id, t.rcConfig.config) + }, + async () => { + await t.axios.get('/foo') + } + ] + + t.agent.on('debugger-diagnostics', ({ payload }) => { + if (payload.debugger.diagnostics.status === 'INSTALLED') triggers.shift()().catch(done) + }) + + t.agent.on('debugger-input', ({ payload }) => { + assert.strictEqual(payload.message, expectedMessages.shift()) + if (expectedMessages.length === 0) done() + }) + + t.agent.addRemoteConfig(t.rcConfig) + }) + + it('should not trigger if probe is deleted', function (done) { + t.agent.on('debugger-diagnostics', async ({ payload }) => { + try { + if (payload.debugger.diagnostics.status === 'INSTALLED') { + t.agent.once('remote-confg-responded', async () => { + try { + await t.axios.get('/foo') + // We want to wait enough time to see if the client triggers on the breakpoint so that the test can fail + // if it does, but not so long that the test times out. + // TODO: Is there some signal we can use instead of a timer? + setTimeout(done, pollInterval * 2 * 1000) // wait twice as long as the RC poll interval + } catch (err) { + // Nessecary hack: Any errors thrown inside of an async function is invisible to Mocha unless the outer + // `it` callback is also `async` (which we can't do in this case since we rely on the `done` callback). + done(err) + } + }) + + t.agent.removeRemoteConfig(t.rcConfig.id) + } + } catch (err) { + // Nessecary hack: Any errors thrown inside of an async function is invisible to Mocha unless the outer `it` + // callback is also `async` (which we can't do in this case since we rely on the `done` callback). + done(err) + } + }) + + t.agent.on('debugger-input', () => { + assert.fail('should not capture anything when the probe is deleted') + }) + + t.agent.addRemoteConfig(t.rcConfig) + }) + }) + + describe('race conditions', function () { + it('should remove the last breakpoint completely before trying to add a new one', function (done) { + const rcConfig2 = t.generateRemoteConfig() + + t.agent.on('debugger-diagnostics', ({ payload: { debugger: { diagnostics: { status, probeId } } } }) => { + if (status !== 'INSTALLED') return + + if (probeId === t.rcConfig.config.id) { + // First INSTALLED payload: Try to trigger the race condition. + t.agent.removeRemoteConfig(t.rcConfig.id) + t.agent.addRemoteConfig(rcConfig2) + } else { + // Second INSTALLED payload: Perform an HTTP request to see if we successfully handled the race condition. + let finished = false + + // If the race condition occurred, the debugger will have been detached from the main thread and the new + // probe will never trigger. If that's the case, the following timer will fire: + const timer = setTimeout(() => { + done(new Error('Race condition occurred!')) + }, 1000) + + // If we successfully handled the race condition, the probe will trigger, we'll get a probe result and the + // following event listener will be called: + t.agent.once('debugger-input', () => { + clearTimeout(timer) + finished = true + done() + }) + + // Perform HTTP request to try and trigger the probe + t.axios.get('/foo').catch((err) => { + // If the request hasn't fully completed by the time the tests ends and the target app is destroyed, Axios + // will complain with a "socket hang up" error. Hence this sanity check before calling `done(err)`. If we + // later add more tests below this one, this shouuldn't be an issue. + if (!finished) done(err) + }) + } + }) + + t.agent.addRemoteConfig(t.rcConfig) + }) + }) +}) diff --git a/integration-tests/debugger/index.spec.js b/integration-tests/debugger/index.spec.js deleted file mode 100644 index 8670ba82b47..00000000000 --- a/integration-tests/debugger/index.spec.js +++ /dev/null @@ -1,654 +0,0 @@ -'use strict' - -const path = require('path') -const { randomUUID } = require('crypto') -const os = require('os') - -const getPort = require('get-port') -const Axios = require('axios') -const { assert } = require('chai') -const { assertObjectContains, assertUUID, createSandbox, FakeAgent, spawnProc } = require('../helpers') -const { ACKNOWLEDGED, ERROR } = require('../../packages/dd-trace/src/appsec/remote_config/apply_states') -const { version } = require('../../package.json') - -const probeFile = 'debugger/target-app/index.js' -const probeLineNo = 14 -const pollInterval = 1 - -describe('Dynamic Instrumentation', function () { - let axios, sandbox, cwd, appPort, appFile, agent, proc, rcConfig - - before(async function () { - sandbox = await createSandbox(['fastify']) - cwd = sandbox.folder - appFile = path.join(cwd, ...probeFile.split('/')) - }) - - after(async function () { - await sandbox.remove() - }) - - beforeEach(async function () { - rcConfig = generateRemoteConfig() - appPort = await getPort() - agent = await new FakeAgent().start() - proc = await spawnProc(appFile, { - cwd, - env: { - APP_PORT: appPort, - DD_DYNAMIC_INSTRUMENTATION_ENABLED: true, - DD_TRACE_AGENT_PORT: agent.port, - DD_TRACE_DEBUG: process.env.DD_TRACE_DEBUG, // inherit to make debugging the sandbox easier - DD_REMOTE_CONFIG_POLL_INTERVAL_SECONDS: pollInterval - } - }) - axios = Axios.create({ - baseURL: `http://localhost:${appPort}` - }) - }) - - afterEach(async function () { - proc.kill() - await agent.stop() - }) - - it('base case: target app should work as expected if no test probe has been added', async function () { - const response = await axios.get('/foo') - assert.strictEqual(response.status, 200) - assert.deepStrictEqual(response.data, { hello: 'foo' }) - }) - - describe('diagnostics messages', function () { - it('should send expected diagnostics messages if probe is received and triggered', function (done) { - let receivedAckUpdate = false - const probeId = rcConfig.config.id - const expectedPayloads = [{ - ddsource: 'dd_debugger', - service: 'node', - debugger: { diagnostics: { probeId, version: 0, status: 'RECEIVED' } } - }, { - ddsource: 'dd_debugger', - service: 'node', - debugger: { diagnostics: { probeId, version: 0, status: 'INSTALLED' } } - }, { - ddsource: 'dd_debugger', - service: 'node', - debugger: { diagnostics: { probeId, version: 0, status: 'EMITTING' } } - }] - - agent.on('remote-config-ack-update', (id, version, state, error) => { - assert.strictEqual(id, rcConfig.id) - assert.strictEqual(version, 1) - assert.strictEqual(state, ACKNOWLEDGED) - assert.notOk(error) // falsy check since error will be an empty string, but that's an implementation detail - - receivedAckUpdate = true - endIfDone() - }) - - agent.on('debugger-diagnostics', ({ payload }) => { - const expected = expectedPayloads.shift() - assertObjectContains(payload, expected) - assertUUID(payload.debugger.diagnostics.runtimeId) - - if (payload.debugger.diagnostics.status === 'INSTALLED') { - axios.get('/foo') - .then((response) => { - assert.strictEqual(response.status, 200) - assert.deepStrictEqual(response.data, { hello: 'foo' }) - }) - .catch(done) - } else { - endIfDone() - } - }) - - agent.addRemoteConfig(rcConfig) - - function endIfDone () { - if (receivedAckUpdate && expectedPayloads.length === 0) done() - } - }) - - it('should send expected diagnostics messages if probe is first received and then updated', function (done) { - let receivedAckUpdates = 0 - const probeId = rcConfig.config.id - const expectedPayloads = [{ - ddsource: 'dd_debugger', - service: 'node', - debugger: { diagnostics: { probeId, version: 0, status: 'RECEIVED' } } - }, { - ddsource: 'dd_debugger', - service: 'node', - debugger: { diagnostics: { probeId, version: 0, status: 'INSTALLED' } } - }, { - ddsource: 'dd_debugger', - service: 'node', - debugger: { diagnostics: { probeId, version: 1, status: 'RECEIVED' } } - }, { - ddsource: 'dd_debugger', - service: 'node', - debugger: { diagnostics: { probeId, version: 1, status: 'INSTALLED' } } - }] - const triggers = [ - () => { - rcConfig.config.version++ - agent.updateRemoteConfig(rcConfig.id, rcConfig.config) - }, - () => {} - ] - - agent.on('remote-config-ack-update', (id, version, state, error) => { - assert.strictEqual(id, rcConfig.id) - assert.strictEqual(version, ++receivedAckUpdates) - assert.strictEqual(state, ACKNOWLEDGED) - assert.notOk(error) // falsy check since error will be an empty string, but that's an implementation detail - - endIfDone() - }) - - agent.on('debugger-diagnostics', ({ payload }) => { - const expected = expectedPayloads.shift() - assertObjectContains(payload, expected) - assertUUID(payload.debugger.diagnostics.runtimeId) - if (payload.debugger.diagnostics.status === 'INSTALLED') triggers.shift()() - endIfDone() - }) - - agent.addRemoteConfig(rcConfig) - - function endIfDone () { - if (receivedAckUpdates === 2 && expectedPayloads.length === 0) done() - } - }) - - it('should send expected diagnostics messages if probe is first received and then deleted', function (done) { - let receivedAckUpdate = false - let payloadsProcessed = false - const probeId = rcConfig.config.id - const expectedPayloads = [{ - ddsource: 'dd_debugger', - service: 'node', - debugger: { diagnostics: { probeId, version: 0, status: 'RECEIVED' } } - }, { - ddsource: 'dd_debugger', - service: 'node', - debugger: { diagnostics: { probeId, version: 0, status: 'INSTALLED' } } - }] - - agent.on('remote-config-ack-update', (id, version, state, error) => { - assert.strictEqual(id, rcConfig.id) - assert.strictEqual(version, 1) - assert.strictEqual(state, ACKNOWLEDGED) - assert.notOk(error) // falsy check since error will be an empty string, but that's an implementation detail - - receivedAckUpdate = true - endIfDone() - }) - - agent.on('debugger-diagnostics', ({ payload }) => { - const expected = expectedPayloads.shift() - assertObjectContains(payload, expected) - assertUUID(payload.debugger.diagnostics.runtimeId) - - if (payload.debugger.diagnostics.status === 'INSTALLED') { - agent.removeRemoteConfig(rcConfig.id) - // Wait a little to see if we get any follow-up `debugger-diagnostics` messages - setTimeout(() => { - payloadsProcessed = true - endIfDone() - }, pollInterval * 2 * 1000) // wait twice as long as the RC poll interval - } - }) - - agent.addRemoteConfig(rcConfig) - - function endIfDone () { - if (receivedAckUpdate && payloadsProcessed) done() - } - }) - - const unsupporedOrInvalidProbes = [[ - 'should send expected error diagnostics messages if probe doesn\'t conform to expected schema', - 'bad config!!!', - { status: 'ERROR' } - ], [ - 'should send expected error diagnostics messages if probe type isn\'t supported', - generateProbeConfig({ type: 'INVALID_PROBE' }) - ], [ - 'should send expected error diagnostics messages if it isn\'t a line-probe', - generateProbeConfig({ where: { foo: 'bar' } }) // TODO: Use valid schema for method probe instead - ]] - - for (const [title, config, customErrorDiagnosticsObj] of unsupporedOrInvalidProbes) { - it(title, function (done) { - let receivedAckUpdate = false - - agent.on('remote-config-ack-update', (id, version, state, error) => { - assert.strictEqual(id, `logProbe_${config.id}`) - assert.strictEqual(version, 1) - assert.strictEqual(state, ERROR) - assert.strictEqual(error.slice(0, 6), 'Error:') - - receivedAckUpdate = true - endIfDone() - }) - - const probeId = config.id - const expectedPayloads = [{ - ddsource: 'dd_debugger', - service: 'node', - debugger: { diagnostics: { status: 'RECEIVED' } } - }, { - ddsource: 'dd_debugger', - service: 'node', - debugger: { diagnostics: customErrorDiagnosticsObj ?? { probeId, version: 0, status: 'ERROR' } } - }] - - agent.on('debugger-diagnostics', ({ payload }) => { - const expected = expectedPayloads.shift() - assertObjectContains(payload, expected) - const { diagnostics } = payload.debugger - assertUUID(diagnostics.runtimeId) - - if (diagnostics.status === 'ERROR') { - assert.property(diagnostics, 'exception') - assert.hasAllKeys(diagnostics.exception, ['message', 'stacktrace']) - assert.typeOf(diagnostics.exception.message, 'string') - assert.typeOf(diagnostics.exception.stacktrace, 'string') - } - - endIfDone() - }) - - agent.addRemoteConfig({ - product: 'LIVE_DEBUGGING', - id: `logProbe_${config.id}`, - config - }) - - function endIfDone () { - if (receivedAckUpdate && expectedPayloads.length === 0) done() - } - }) - } - }) - - describe('input messages', function () { - it('should capture and send expected payload when a log line probe is triggered', function (done) { - agent.on('debugger-diagnostics', ({ payload }) => { - if (payload.debugger.diagnostics.status === 'INSTALLED') { - axios.get('/foo') - } - }) - - agent.on('debugger-input', ({ payload }) => { - const expected = { - ddsource: 'dd_debugger', - hostname: os.hostname(), - service: 'node', - message: 'Hello World!', - logger: { - name: 'debugger/target-app/index.js', - method: 'handler', - version, - thread_name: 'MainThread' - }, - 'debugger.snapshot': { - probe: { - id: rcConfig.config.id, - version: 0, - location: { file: probeFile, lines: [String(probeLineNo)] } - }, - language: 'javascript' - } - } - - assertObjectContains(payload, expected) - assert.match(payload.logger.thread_id, /^pid:\d+$/) - assertUUID(payload['debugger.snapshot'].id) - assert.isNumber(payload['debugger.snapshot'].timestamp) - assert.isTrue(payload['debugger.snapshot'].timestamp > Date.now() - 1000 * 60) - assert.isTrue(payload['debugger.snapshot'].timestamp <= Date.now()) - - assert.isArray(payload['debugger.snapshot'].stack) - assert.isAbove(payload['debugger.snapshot'].stack.length, 0) - for (const frame of payload['debugger.snapshot'].stack) { - assert.isObject(frame) - assert.hasAllKeys(frame, ['fileName', 'function', 'lineNumber', 'columnNumber']) - assert.isString(frame.fileName) - assert.isString(frame.function) - assert.isAbove(frame.lineNumber, 0) - assert.isAbove(frame.columnNumber, 0) - } - const topFrame = payload['debugger.snapshot'].stack[0] - assert.match(topFrame.fileName, new RegExp(`${appFile}$`)) // path seems to be prefeixed with `/private` on Mac - assert.strictEqual(topFrame.function, 'handler') - assert.strictEqual(topFrame.lineNumber, probeLineNo) - assert.strictEqual(topFrame.columnNumber, 3) - - done() - }) - - agent.addRemoteConfig(rcConfig) - }) - - it('should respond with updated message if probe message is updated', function (done) { - const expectedMessages = ['Hello World!', 'Hello Updated World!'] - const triggers = [ - async () => { - await axios.get('/foo') - rcConfig.config.version++ - rcConfig.config.template = 'Hello Updated World!' - agent.updateRemoteConfig(rcConfig.id, rcConfig.config) - }, - async () => { - await axios.get('/foo') - } - ] - - agent.on('debugger-diagnostics', ({ payload }) => { - if (payload.debugger.diagnostics.status === 'INSTALLED') triggers.shift()().catch(done) - }) - - agent.on('debugger-input', ({ payload }) => { - assert.strictEqual(payload.message, expectedMessages.shift()) - if (expectedMessages.length === 0) done() - }) - - agent.addRemoteConfig(rcConfig) - }) - - it('should not trigger if probe is deleted', function (done) { - agent.on('debugger-diagnostics', async ({ payload }) => { - try { - if (payload.debugger.diagnostics.status === 'INSTALLED') { - agent.once('remote-confg-responded', async () => { - try { - await axios.get('/foo') - // We want to wait enough time to see if the client triggers on the breakpoint so that the test can fail - // if it does, but not so long that the test times out. - // TODO: Is there some signal we can use instead of a timer? - setTimeout(done, pollInterval * 2 * 1000) // wait twice as long as the RC poll interval - } catch (err) { - // Nessecary hack: Any errors thrown inside of an async function is invisible to Mocha unless the outer - // `it` callback is also `async` (which we can't do in this case since we rely on the `done` callback). - done(err) - } - }) - - agent.removeRemoteConfig(rcConfig.id) - } - } catch (err) { - // Nessecary hack: Any errors thrown inside of an async function is invisible to Mocha unless the outer `it` - // callback is also `async` (which we can't do in this case since we rely on the `done` callback). - done(err) - } - }) - - agent.on('debugger-input', () => { - assert.fail('should not capture anything when the probe is deleted') - }) - - agent.addRemoteConfig(rcConfig) - }) - - describe('with snapshot', () => { - beforeEach(() => { - // Trigger the breakpoint once probe is successfully installed - agent.on('debugger-diagnostics', ({ payload }) => { - if (payload.debugger.diagnostics.status === 'INSTALLED') { - axios.get('/foo') - } - }) - }) - - it('should capture a snapshot', (done) => { - agent.on('debugger-input', ({ payload: { 'debugger.snapshot': { captures } } }) => { - assert.deepEqual(Object.keys(captures), ['lines']) - assert.deepEqual(Object.keys(captures.lines), [String(probeLineNo)]) - - const { locals } = captures.lines[probeLineNo] - const { request, fastify, getSomeData } = locals - delete locals.request - delete locals.fastify - delete locals.getSomeData - - // from block scope - assert.deepEqual(locals, { - nil: { type: 'null', isNull: true }, - undef: { type: 'undefined' }, - bool: { type: 'boolean', value: 'true' }, - num: { type: 'number', value: '42' }, - bigint: { type: 'bigint', value: '42' }, - str: { type: 'string', value: 'foo' }, - lstr: { - type: 'string', - // eslint-disable-next-line max-len - value: 'Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor i', - truncated: true, - size: 445 - }, - sym: { type: 'symbol', value: 'Symbol(foo)' }, - regex: { type: 'RegExp', value: '/bar/i' }, - arr: { - type: 'Array', - elements: [ - { type: 'number', value: '1' }, - { type: 'number', value: '2' }, - { type: 'number', value: '3' }, - { type: 'number', value: '4' }, - { type: 'number', value: '5' } - ] - }, - obj: { - type: 'Object', - fields: { - foo: { - type: 'Object', - fields: { - baz: { type: 'number', value: '42' }, - nil: { type: 'null', isNull: true }, - undef: { type: 'undefined' }, - deep: { - type: 'Object', - fields: { nested: { type: 'Object', notCapturedReason: 'depth' } } - } - } - }, - bar: { type: 'boolean', value: 'true' } - } - }, - emptyObj: { type: 'Object', fields: {} }, - fn: { - type: 'Function', - fields: { - length: { type: 'number', value: '0' }, - name: { type: 'string', value: 'fn' } - } - }, - p: { - type: 'Promise', - fields: { - '[[PromiseState]]': { type: 'string', value: 'fulfilled' }, - '[[PromiseResult]]': { type: 'undefined' } - } - } - }) - - // from local scope - // There's no reason to test the `request` object 100%, instead just check its fingerprint - assert.deepEqual(Object.keys(request), ['type', 'fields']) - assert.equal(request.type, 'Request') - assert.deepEqual(request.fields.id, { type: 'string', value: 'req-1' }) - assert.deepEqual(request.fields.params, { - type: 'NullObject', fields: { name: { type: 'string', value: 'foo' } } - }) - assert.deepEqual(request.fields.query, { type: 'Object', fields: {} }) - assert.deepEqual(request.fields.body, { type: 'undefined' }) - - // from closure scope - // There's no reason to test the `fastify` object 100%, instead just check its fingerprint - assert.deepEqual(Object.keys(fastify), ['type', 'fields']) - assert.equal(fastify.type, 'Object') - - assert.deepEqual(getSomeData, { - type: 'Function', - fields: { - length: { type: 'number', value: '0' }, - name: { type: 'string', value: 'getSomeData' } - } - }) - - done() - }) - - agent.addRemoteConfig(generateRemoteConfig({ captureSnapshot: true })) - }) - - it('should respect maxReferenceDepth', (done) => { - agent.on('debugger-input', ({ payload: { 'debugger.snapshot': { captures } } }) => { - const { locals } = captures.lines[probeLineNo] - delete locals.request - delete locals.fastify - delete locals.getSomeData - - assert.deepEqual(locals, { - nil: { type: 'null', isNull: true }, - undef: { type: 'undefined' }, - bool: { type: 'boolean', value: 'true' }, - num: { type: 'number', value: '42' }, - bigint: { type: 'bigint', value: '42' }, - str: { type: 'string', value: 'foo' }, - lstr: { - type: 'string', - // eslint-disable-next-line max-len - value: 'Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor i', - truncated: true, - size: 445 - }, - sym: { type: 'symbol', value: 'Symbol(foo)' }, - regex: { type: 'RegExp', value: '/bar/i' }, - arr: { type: 'Array', notCapturedReason: 'depth' }, - obj: { type: 'Object', notCapturedReason: 'depth' }, - emptyObj: { type: 'Object', notCapturedReason: 'depth' }, - fn: { type: 'Function', notCapturedReason: 'depth' }, - p: { type: 'Promise', notCapturedReason: 'depth' } - }) - - done() - }) - - agent.addRemoteConfig(generateRemoteConfig({ captureSnapshot: true, capture: { maxReferenceDepth: 0 } })) - }) - - it('should respect maxLength', (done) => { - agent.on('debugger-input', ({ payload: { 'debugger.snapshot': { captures } } }) => { - const { locals } = captures.lines[probeLineNo] - - assert.deepEqual(locals.lstr, { - type: 'string', - value: 'Lorem ipsu', - truncated: true, - size: 445 - }) - - done() - }) - - agent.addRemoteConfig(generateRemoteConfig({ captureSnapshot: true, capture: { maxLength: 10 } })) - }) - - it('should respect maxCollectionSize', (done) => { - agent.on('debugger-input', ({ payload: { 'debugger.snapshot': { captures } } }) => { - const { locals } = captures.lines[probeLineNo] - - assert.deepEqual(locals.arr, { - type: 'Array', - elements: [ - { type: 'number', value: '1' }, - { type: 'number', value: '2' }, - { type: 'number', value: '3' } - ], - notCapturedReason: 'collectionSize', - size: 5 - }) - - done() - }) - - agent.addRemoteConfig(generateRemoteConfig({ captureSnapshot: true, capture: { maxCollectionSize: 3 } })) - }) - }) - }) - - describe('race conditions', () => { - it('should remove the last breakpoint completely before trying to add a new one', (done) => { - const rcConfig2 = generateRemoteConfig() - - agent.on('debugger-diagnostics', ({ payload: { debugger: { diagnostics: { status, probeId } } } }) => { - if (status !== 'INSTALLED') return - - if (probeId === rcConfig.config.id) { - // First INSTALLED payload: Try to trigger the race condition. - agent.removeRemoteConfig(rcConfig.id) - agent.addRemoteConfig(rcConfig2) - } else { - // Second INSTALLED payload: Perform an HTTP request to see if we successfully handled the race condition. - let finished = false - - // If the race condition occurred, the debugger will have been detached from the main thread and the new - // probe will never trigger. If that's the case, the following timer will fire: - const timer = setTimeout(() => { - done(new Error('Race condition occurred!')) - }, 1000) - - // If we successfully handled the race condition, the probe will trigger, we'll get a probe result and the - // following event listener will be called: - agent.once('debugger-input', () => { - clearTimeout(timer) - finished = true - done() - }) - - // Perform HTTP request to try and trigger the probe - axios.get('/foo').catch((err) => { - // If the request hasn't fully completed by the time the tests ends and the target app is destroyed, Axios - // will complain with a "socket hang up" error. Hence this sanity check before calling `done(err)`. If we - // later add more tests below this one, this shouuldn't be an issue. - if (!finished) done(err) - }) - } - }) - - agent.addRemoteConfig(rcConfig) - }) - }) -}) - -function generateRemoteConfig (overrides = {}) { - overrides.id = overrides.id || randomUUID() - return { - product: 'LIVE_DEBUGGING', - id: `logProbe_${overrides.id}`, - config: generateProbeConfig(overrides) - } -} - -function generateProbeConfig (overrides = {}) { - overrides.capture = { maxReferenceDepth: 3, ...overrides.capture } - overrides.sampling = { snapshotsPerSecond: 5000, ...overrides.sampling } - return { - id: randomUUID(), - version: 0, - type: 'LOG_PROBE', - language: 'javascript', - where: { sourceFile: probeFile, lines: [String(probeLineNo)] }, - tags: [], - template: 'Hello World!', - segments: [{ str: 'Hello World!' }], - captureSnapshot: false, - evaluateAt: 'EXIT', - ...overrides - } -} diff --git a/integration-tests/debugger/snapshot-pruning.spec.js b/integration-tests/debugger/snapshot-pruning.spec.js new file mode 100644 index 00000000000..91190a1c25d --- /dev/null +++ b/integration-tests/debugger/snapshot-pruning.spec.js @@ -0,0 +1,43 @@ +'use strict' + +const { assert } = require('chai') +const { setup, getBreakpointInfo } = require('./utils') + +const { line } = getBreakpointInfo() + +describe('Dynamic Instrumentation', function () { + const t = setup() + + describe('input messages', function () { + describe('with snapshot', function () { + beforeEach(t.triggerBreakpoint) + + it('should prune snapshot if payload is too large', function (done) { + t.agent.on('debugger-input', ({ payload }) => { + assert.isBelow(Buffer.byteLength(JSON.stringify(payload)), 1024 * 1024) // 1MB + assert.deepEqual(payload['debugger.snapshot'].captures, { + lines: { + [line]: { + locals: { + notCapturedReason: 'Snapshot was too large', + size: 6 + } + } + } + }) + done() + }) + + t.agent.addRemoteConfig(t.generateRemoteConfig({ + captureSnapshot: true, + capture: { + // ensure we get a large snapshot + maxCollectionSize: Number.MAX_SAFE_INTEGER, + maxFieldCount: Number.MAX_SAFE_INTEGER, + maxLength: Number.MAX_SAFE_INTEGER + } + })) + }) + }) + }) +}) diff --git a/integration-tests/debugger/snapshot.spec.js b/integration-tests/debugger/snapshot.spec.js new file mode 100644 index 00000000000..94ef323f6a7 --- /dev/null +++ b/integration-tests/debugger/snapshot.spec.js @@ -0,0 +1,239 @@ +'use strict' + +const { assert } = require('chai') +const { setup } = require('./utils') + +describe('Dynamic Instrumentation', function () { + const t = setup() + + describe('input messages', function () { + describe('with snapshot', function () { + beforeEach(t.triggerBreakpoint) + + it('should capture a snapshot', function (done) { + t.agent.on('debugger-input', ({ payload: { 'debugger.snapshot': { captures } } }) => { + assert.deepEqual(Object.keys(captures), ['lines']) + assert.deepEqual(Object.keys(captures.lines), [String(t.breakpoint.line)]) + + const { locals } = captures.lines[t.breakpoint.line] + const { request, fastify, getSomeData } = locals + delete locals.request + delete locals.fastify + delete locals.getSomeData + + // from block scope + assert.deepEqual(locals, { + nil: { type: 'null', isNull: true }, + undef: { type: 'undefined' }, + bool: { type: 'boolean', value: 'true' }, + num: { type: 'number', value: '42' }, + bigint: { type: 'bigint', value: '42' }, + str: { type: 'string', value: 'foo' }, + lstr: { + type: 'string', + // eslint-disable-next-line max-len + value: 'Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor i', + truncated: true, + size: 445 + }, + sym: { type: 'symbol', value: 'Symbol(foo)' }, + regex: { type: 'RegExp', value: '/bar/i' }, + arr: { + type: 'Array', + elements: [ + { type: 'number', value: '1' }, + { type: 'number', value: '2' }, + { type: 'number', value: '3' }, + { type: 'number', value: '4' }, + { type: 'number', value: '5' } + ] + }, + obj: { + type: 'Object', + fields: { + foo: { + type: 'Object', + fields: { + baz: { type: 'number', value: '42' }, + nil: { type: 'null', isNull: true }, + undef: { type: 'undefined' }, + deep: { + type: 'Object', + fields: { nested: { type: 'Object', notCapturedReason: 'depth' } } + } + } + }, + bar: { type: 'boolean', value: 'true' } + } + }, + emptyObj: { type: 'Object', fields: {} }, + fn: { + type: 'Function', + fields: { + length: { type: 'number', value: '0' }, + name: { type: 'string', value: 'fn' } + } + }, + p: { + type: 'Promise', + fields: { + '[[PromiseState]]': { type: 'string', value: 'fulfilled' }, + '[[PromiseResult]]': { type: 'undefined' } + } + } + }) + + // from local scope + // There's no reason to test the `request` object 100%, instead just check its fingerprint + assert.deepEqual(Object.keys(request), ['type', 'fields']) + assert.equal(request.type, 'Request') + assert.deepEqual(request.fields.id, { type: 'string', value: 'req-1' }) + assert.deepEqual(request.fields.params, { + type: 'NullObject', fields: { name: { type: 'string', value: 'foo' } } + }) + assert.deepEqual(request.fields.query, { type: 'Object', fields: {} }) + assert.deepEqual(request.fields.body, { type: 'undefined' }) + + // from closure scope + // There's no reason to test the `fastify` object 100%, instead just check its fingerprint + assert.equal(fastify.type, 'Object') + assert.typeOf(fastify.fields, 'Object') + + assert.deepEqual(getSomeData, { + type: 'Function', + fields: { + length: { type: 'number', value: '0' }, + name: { type: 'string', value: 'getSomeData' } + } + }) + + done() + }) + + t.agent.addRemoteConfig(t.generateRemoteConfig({ captureSnapshot: true })) + }) + + it('should respect maxReferenceDepth', function (done) { + t.agent.on('debugger-input', ({ payload: { 'debugger.snapshot': { captures } } }) => { + const { locals } = captures.lines[t.breakpoint.line] + delete locals.request + delete locals.fastify + delete locals.getSomeData + + assert.deepEqual(locals, { + nil: { type: 'null', isNull: true }, + undef: { type: 'undefined' }, + bool: { type: 'boolean', value: 'true' }, + num: { type: 'number', value: '42' }, + bigint: { type: 'bigint', value: '42' }, + str: { type: 'string', value: 'foo' }, + lstr: { + type: 'string', + // eslint-disable-next-line max-len + value: 'Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor i', + truncated: true, + size: 445 + }, + sym: { type: 'symbol', value: 'Symbol(foo)' }, + regex: { type: 'RegExp', value: '/bar/i' }, + arr: { type: 'Array', notCapturedReason: 'depth' }, + obj: { type: 'Object', notCapturedReason: 'depth' }, + emptyObj: { type: 'Object', notCapturedReason: 'depth' }, + fn: { type: 'Function', notCapturedReason: 'depth' }, + p: { type: 'Promise', notCapturedReason: 'depth' } + }) + + done() + }) + + t.agent.addRemoteConfig(t.generateRemoteConfig({ captureSnapshot: true, capture: { maxReferenceDepth: 0 } })) + }) + + it('should respect maxLength', function (done) { + t.agent.on('debugger-input', ({ payload: { 'debugger.snapshot': { captures } } }) => { + const { locals } = captures.lines[t.breakpoint.line] + + assert.deepEqual(locals.lstr, { + type: 'string', + value: 'Lorem ipsu', + truncated: true, + size: 445 + }) + + done() + }) + + t.agent.addRemoteConfig(t.generateRemoteConfig({ captureSnapshot: true, capture: { maxLength: 10 } })) + }) + + it('should respect maxCollectionSize', function (done) { + t.agent.on('debugger-input', ({ payload: { 'debugger.snapshot': { captures } } }) => { + const { locals } = captures.lines[t.breakpoint.line] + + assert.deepEqual(locals.arr, { + type: 'Array', + elements: [ + { type: 'number', value: '1' }, + { type: 'number', value: '2' }, + { type: 'number', value: '3' } + ], + notCapturedReason: 'collectionSize', + size: 5 + }) + + done() + }) + + t.agent.addRemoteConfig(t.generateRemoteConfig({ captureSnapshot: true, capture: { maxCollectionSize: 3 } })) + }) + + it('should respect maxFieldCount', (done) => { + const maxFieldCount = 3 + + function assertMaxFieldCount (prop) { + if ('fields' in prop) { + if (prop.notCapturedReason === 'fieldCount') { + assert.strictEqual(Object.keys(prop.fields).length, maxFieldCount) + assert.isAbove(prop.size, maxFieldCount) + } else { + assert.isBelow(Object.keys(prop.fields).length, maxFieldCount) + } + } + + for (const value of Object.values(prop.fields || prop.elements || prop.entries || {})) { + assertMaxFieldCount(value) + } + } + + t.agent.on('debugger-input', ({ payload: { 'debugger.snapshot': { captures } } }) => { + const { locals } = captures.lines[t.breakpoint.line] + + assert.deepEqual(Object.keys(locals), [ + // Up to 3 properties from the local scope + 'request', 'nil', 'undef', + // Up to 3 properties from the closure scope + 'fastify', 'getSomeData' + ]) + + assert.strictEqual(locals.request.type, 'Request') + assert.strictEqual(Object.keys(locals.request.fields).length, maxFieldCount) + assert.strictEqual(locals.request.notCapturedReason, 'fieldCount') + assert.isAbove(locals.request.size, maxFieldCount) + + assert.strictEqual(locals.fastify.type, 'Object') + assert.strictEqual(Object.keys(locals.fastify.fields).length, maxFieldCount) + assert.strictEqual(locals.fastify.notCapturedReason, 'fieldCount') + assert.isAbove(locals.fastify.size, maxFieldCount) + + for (const value of Object.values(locals)) { + assertMaxFieldCount(value) + } + + done() + }) + + t.agent.addRemoteConfig(t.generateRemoteConfig({ captureSnapshot: true, capture: { maxFieldCount } })) + }) + }) + }) +}) diff --git a/integration-tests/debugger/target-app/basic.js b/integration-tests/debugger/target-app/basic.js new file mode 100644 index 00000000000..f8330012278 --- /dev/null +++ b/integration-tests/debugger/target-app/basic.js @@ -0,0 +1,18 @@ +'use strict' + +require('dd-trace/init') +const Fastify = require('fastify') + +const fastify = Fastify() + +fastify.get('/:name', function handler (request) { + return { hello: request.params.name } // BREAKPOINT +}) + +fastify.listen({ port: process.env.APP_PORT }, (err) => { + if (err) { + fastify.log.error(err) + process.exit(1) + } + process.send({ port: process.env.APP_PORT }) +}) diff --git a/integration-tests/debugger/target-app/snapshot-pruning.js b/integration-tests/debugger/target-app/snapshot-pruning.js new file mode 100644 index 00000000000..58752006192 --- /dev/null +++ b/integration-tests/debugger/target-app/snapshot-pruning.js @@ -0,0 +1,41 @@ +'use strict' + +require('dd-trace/init') + +const { randomBytes } = require('crypto') +const Fastify = require('fastify') + +const fastify = Fastify() + +const TARGET_SIZE = 1024 * 1024 // 1MB +const LARGE_STRING = randomBytes(1024).toString('hex') + +fastify.get('/:name', function handler (request) { + // eslint-disable-next-line no-unused-vars + const obj = generateObjectWithJSONSizeLargerThan1MB() + + return { hello: request.params.name } // BREAKPOINT +}) + +fastify.listen({ port: process.env.APP_PORT }, (err) => { + if (err) { + fastify.log.error(err) + process.exit(1) + } + process.send({ port: process.env.APP_PORT }) +}) + +function generateObjectWithJSONSizeLargerThan1MB () { + const obj = {} + let i = 0 + + while (++i) { + if (i % 100 === 0) { + const size = JSON.stringify(obj).length + if (size > TARGET_SIZE) break + } + obj[i] = LARGE_STRING + } + + return obj +} diff --git a/integration-tests/debugger/target-app/index.js b/integration-tests/debugger/target-app/snapshot.js similarity index 92% rename from integration-tests/debugger/target-app/index.js rename to integration-tests/debugger/target-app/snapshot.js index 75b8f551a7a..a7b1810c10b 100644 --- a/integration-tests/debugger/target-app/index.js +++ b/integration-tests/debugger/target-app/snapshot.js @@ -11,11 +11,9 @@ const fastify = Fastify() fastify.get('/:name', function handler (request) { // eslint-disable-next-line no-unused-vars const { nil, undef, bool, num, bigint, str, lstr, sym, regex, arr, obj, emptyObj, fn, p } = getSomeData() - return { hello: request.params.name } + return { hello: request.params.name } // BREAKPOINT }) -// WARNING: Breakpoints present above this line - Any changes to the lines above might influence tests! - fastify.listen({ port: process.env.APP_PORT }, (err) => { if (err) { fastify.log.error(err) diff --git a/integration-tests/debugger/utils.js b/integration-tests/debugger/utils.js new file mode 100644 index 00000000000..c5760a0e9d4 --- /dev/null +++ b/integration-tests/debugger/utils.js @@ -0,0 +1,124 @@ +'use strict' + +const { basename, join } = require('path') +const { readFileSync } = require('fs') +const { randomUUID } = require('crypto') + +const getPort = require('get-port') +const Axios = require('axios') + +const { createSandbox, FakeAgent, spawnProc } = require('../helpers') + +const pollInterval = 1 + +module.exports = { + pollInterval, + setup, + getBreakpointInfo +} + +function setup () { + let sandbox, cwd, appPort, proc + const breakpoint = getBreakpointInfo(1) // `1` to disregard the `setup` function + const t = { + breakpoint, + axios: null, + appFile: null, + agent: null, + rcConfig: null, + triggerBreakpoint, + generateRemoteConfig, + generateProbeConfig + } + + function triggerBreakpoint () { + // Trigger the breakpoint once probe is successfully installed + t.agent.on('debugger-diagnostics', ({ payload }) => { + if (payload.debugger.diagnostics.status === 'INSTALLED') { + t.axios.get('/foo') + } + }) + } + + function generateRemoteConfig (overrides = {}) { + overrides.id = overrides.id || randomUUID() + return { + product: 'LIVE_DEBUGGING', + id: `logProbe_${overrides.id}`, + config: generateProbeConfig(overrides) + } + } + + function generateProbeConfig (overrides = {}) { + overrides.capture = { maxReferenceDepth: 3, ...overrides.capture } + overrides.sampling = { snapshotsPerSecond: 5000, ...overrides.sampling } + return { + id: randomUUID(), + version: 0, + type: 'LOG_PROBE', + language: 'javascript', + where: { sourceFile: breakpoint.file, lines: [String(breakpoint.line)] }, + tags: [], + template: 'Hello World!', + segments: [{ str: 'Hello World!' }], + captureSnapshot: false, + evaluateAt: 'EXIT', + ...overrides + } + } + + before(async function () { + sandbox = await createSandbox(['fastify']) + cwd = sandbox.folder + t.appFile = join(cwd, ...breakpoint.file.split('/')) + }) + + after(async function () { + await sandbox.remove() + }) + + beforeEach(async function () { + t.rcConfig = generateRemoteConfig(breakpoint) + appPort = await getPort() + t.agent = await new FakeAgent().start() + proc = await spawnProc(t.appFile, { + cwd, + env: { + APP_PORT: appPort, + DD_DYNAMIC_INSTRUMENTATION_ENABLED: true, + DD_TRACE_AGENT_PORT: t.agent.port, + DD_TRACE_DEBUG: process.env.DD_TRACE_DEBUG, // inherit to make debugging the sandbox easier + DD_REMOTE_CONFIG_POLL_INTERVAL_SECONDS: pollInterval + } + }) + t.axios = Axios.create({ + baseURL: `http://localhost:${appPort}` + }) + }) + + afterEach(async function () { + proc.kill() + await t.agent.stop() + }) + + return t +} + +function getBreakpointInfo (stackIndex = 0) { + // First, get the filename of file that called this function + const testFile = new Error().stack + .split('\n')[stackIndex + 2] // +2 to skip this function + the first line, which is the error message + .split(' (')[1] + .slice(0, -1) + .split(':')[0] + + // Then, find the corresponding file in which the breakpoint exists + const filename = basename(testFile).replace('.spec', '') + + // Finally, find the line number of the breakpoint + const line = readFileSync(join(__dirname, 'target-app', filename), 'utf8') + .split('\n') + .findIndex(line => line.includes('// BREAKPOINT')) + 1 + + return { file: `debugger/target-app/${filename}`, line } +} diff --git a/integration-tests/helpers/fake-agent.js b/integration-tests/helpers/fake-agent.js index 70aff2ecfa8..f1054720d92 100644 --- a/integration-tests/helpers/fake-agent.js +++ b/integration-tests/helpers/fake-agent.js @@ -188,6 +188,46 @@ module.exports = class FakeAgent extends EventEmitter { return resultPromise } + + assertLlmObsPayloadReceived (fn, timeout, expectedMessageCount = 1, resolveAtFirstSuccess) { + timeout = timeout || 30000 + let resultResolve + let resultReject + let msgCount = 0 + const errors = [] + + const timeoutObj = setTimeout(() => { + const errorsMsg = errors.length === 0 ? '' : `, additionally:\n${errors.map(e => e.stack).join('\n')}\n===\n` + resultReject(new Error(`timeout${errorsMsg}`, { cause: { errors } })) + }, timeout) + + const resultPromise = new Promise((resolve, reject) => { + resultResolve = () => { + clearTimeout(timeoutObj) + resolve() + } + resultReject = (e) => { + clearTimeout(timeoutObj) + reject(e) + } + }) + + const messageHandler = msg => { + try { + msgCount += 1 + fn(msg) + if (resolveAtFirstSuccess || msgCount === expectedMessageCount) { + resultResolve() + this.removeListener('llmobs', messageHandler) + } + } catch (e) { + errors.push(e) + } + } + this.on('llmobs', messageHandler) + + return resultPromise + } } function buildExpressServer (agent) { @@ -315,6 +355,14 @@ function buildExpressServer (agent) { }) }) + app.post('/evp_proxy/v2/api/v2/llmobs', (req, res) => { + res.status(200).send() + agent.emit('llmobs', { + headers: req.headers, + payload: req.body + }) + }) + return app } diff --git a/integration-tests/mocha/mocha.spec.js b/integration-tests/mocha/mocha.spec.js index dac0a9e3bff..3fa11871204 100644 --- a/integration-tests/mocha/mocha.spec.js +++ b/integration-tests/mocha/mocha.spec.js @@ -1875,7 +1875,7 @@ describe('mocha CommonJS', function () { }) }) - context('flaky test retries', () => { + context('auto test retries', () => { it('retries failed tests automatically', (done) => { receiver.setSettings({ itr_enabled: false, @@ -1911,6 +1911,10 @@ describe('mocha CommonJS', function () { const failedAttempts = tests.filter(test => test.meta[TEST_STATUS] === 'fail') assert.equal(failedAttempts.length, 2) + failedAttempts.forEach((failedTest, index) => { + assert.include(failedTest.meta[ERROR_MESSAGE], `expected ${index + 1} to equal 3`) + }) + // The first attempt is not marked as a retry const retriedFailure = failedAttempts.filter(test => test.meta[TEST_IS_RETRY] === 'true') assert.equal(retriedFailure.length, 1) diff --git a/integration-tests/profiler/profiler.spec.js b/integration-tests/profiler/profiler.spec.js index 7306d7051ad..f4760a0a167 100644 --- a/integration-tests/profiler/profiler.spec.js +++ b/integration-tests/profiler/profiler.spec.js @@ -13,7 +13,6 @@ const fsync = require('fs') const net = require('net') const zlib = require('zlib') const { Profile } = require('pprof-format') -const semver = require('semver') const DEFAULT_PROFILE_TYPES = ['wall', 'space'] if (process.platform !== 'win32') { @@ -315,61 +314,59 @@ describe('profiler', () => { assert.equal(endpoints.size, 3, encoded) }) - if (semver.gte(process.version, '16.0.0')) { - it('dns timeline events work', async () => { - const dnsEvents = await gatherNetworkTimelineEvents(cwd, 'profiler/dnstest.js', 'dns') - assert.sameDeepMembers(dnsEvents, [ - { name: 'lookup', host: 'example.org' }, - { name: 'lookup', host: 'example.com' }, - { name: 'lookup', host: 'datadoghq.com' }, - { name: 'queryA', host: 'datadoghq.com' }, - { name: 'lookupService', address: '13.224.103.60', port: 80 } - ]) - }) + it('dns timeline events work', async () => { + const dnsEvents = await gatherNetworkTimelineEvents(cwd, 'profiler/dnstest.js', 'dns') + assert.sameDeepMembers(dnsEvents, [ + { name: 'lookup', host: 'example.org' }, + { name: 'lookup', host: 'example.com' }, + { name: 'lookup', host: 'datadoghq.com' }, + { name: 'queryA', host: 'datadoghq.com' }, + { name: 'lookupService', address: '13.224.103.60', port: 80 } + ]) + }) - it('net timeline events work', async () => { - // Simple server that writes a constant message to the socket. - const msg = 'cya later!\n' - function createServer () { - const server = net.createServer((socket) => { - socket.end(msg, 'utf8') - }).on('error', (err) => { - throw err - }) - return server - } - // Create two instances of the server - const server1 = createServer() + it('net timeline events work', async () => { + // Simple server that writes a constant message to the socket. + const msg = 'cya later!\n' + function createServer () { + const server = net.createServer((socket) => { + socket.end(msg, 'utf8') + }).on('error', (err) => { + throw err + }) + return server + } + // Create two instances of the server + const server1 = createServer() + try { + const server2 = createServer() try { - const server2 = createServer() - try { - // Have the servers listen on ephemeral ports - const p = new Promise(resolve => { - server1.listen(0, () => { - server2.listen(0, async () => { - resolve([server1.address().port, server2.address().port]) - }) + // Have the servers listen on ephemeral ports + const p = new Promise(resolve => { + server1.listen(0, () => { + server2.listen(0, async () => { + resolve([server1.address().port, server2.address().port]) }) }) - const [port1, port2] = await p - const args = [String(port1), String(port2), msg] - // Invoke the profiled program, passing it the ports of the servers and - // the expected message. - const events = await gatherNetworkTimelineEvents(cwd, 'profiler/nettest.js', 'net', args) - // The profiled program should have two TCP connection events to the two - // servers. - assert.sameDeepMembers(events, [ - { name: 'connect', host: '127.0.0.1', port: port1 }, - { name: 'connect', host: '127.0.0.1', port: port2 } - ]) - } finally { - server2.close() - } + }) + const [port1, port2] = await p + const args = [String(port1), String(port2), msg] + // Invoke the profiled program, passing it the ports of the servers and + // the expected message. + const events = await gatherNetworkTimelineEvents(cwd, 'profiler/nettest.js', 'net', args) + // The profiled program should have two TCP connection events to the two + // servers. + assert.sameDeepMembers(events, [ + { name: 'connect', host: '127.0.0.1', port: port1 }, + { name: 'connect', host: '127.0.0.1', port: port2 } + ]) } finally { - server1.close() + server2.close() } - }) - } + } finally { + server1.close() + } + }) } context('shutdown', () => { diff --git a/integration-tests/standalone-asm.spec.js b/integration-tests/standalone-asm.spec.js index d57a96f738e..4e57b25bad6 100644 --- a/integration-tests/standalone-asm.spec.js +++ b/integration-tests/standalone-asm.spec.js @@ -10,6 +10,7 @@ const { curlAndAssertMessage, curl } = require('./helpers') +const { USER_KEEP, AUTO_REJECT, AUTO_KEEP } = require('../ext/priority') describe('Standalone ASM', () => { let sandbox, cwd, startupTestFile, agent, proc, env @@ -43,22 +44,18 @@ describe('Standalone ASM', () => { await agent.stop() }) - function assertKeep (payload, manual = true) { + function assertKeep (payload) { const { meta, metrics } = payload - if (manual) { - assert.propertyVal(meta, 'manual.keep', 'true') - } else { - assert.notProperty(meta, 'manual.keep') - } + assert.propertyVal(meta, '_dd.p.appsec', '1') - assert.propertyVal(metrics, '_sampling_priority_v1', 2) + assert.propertyVal(metrics, '_sampling_priority_v1', USER_KEEP) assert.propertyVal(metrics, '_dd.apm.enabled', 0) } function assertDrop (payload) { const { metrics } = payload - assert.propertyVal(metrics, '_sampling_priority_v1', 0) + assert.propertyVal(metrics, '_sampling_priority_v1', AUTO_REJECT) assert.propertyVal(metrics, '_dd.apm.enabled', 0) assert.notProperty(metrics, '_dd.p.appsec') } @@ -103,7 +100,7 @@ describe('Standalone ASM', () => { assert.notProperty(meta, 'manual.keep') assert.notProperty(meta, '_dd.p.appsec') - assert.propertyVal(metrics, '_sampling_priority_v1', 1) + assert.propertyVal(metrics, '_sampling_priority_v1', AUTO_KEEP) assert.propertyVal(metrics, '_dd.apm.enabled', 0) assertDrop(payload[2][0]) @@ -213,7 +210,7 @@ describe('Standalone ASM', () => { const innerReq = payload.find(p => p[0].resource === 'GET /down') assert.notStrictEqual(innerReq, undefined) - assertKeep(innerReq[0], false) + assertKeep(innerReq[0]) }, undefined, undefined, true) }) diff --git a/package.json b/package.json index 481d0d7bb14..4ab799f31a0 100644 --- a/package.json +++ b/package.json @@ -5,6 +5,7 @@ "main": "index.js", "typings": "index.d.ts", "scripts": { + "env": "bash ./plugin-env", "preinstall": "node scripts/preinstall.js", "bench": "node benchmark", "bench:profiler": "node benchmark/profiler", @@ -30,6 +31,10 @@ "test:core:ci": "npm run test:core -- --coverage --nyc-arg=--include=\"packages/datadog-core/src/**/*.js\"", "test:lambda": "mocha -r \"packages/dd-trace/test/setup/mocha.js\" \"packages/dd-trace/test/lambda/**/*.spec.js\"", "test:lambda:ci": "nyc --no-clean --include \"packages/dd-trace/src/lambda/**/*.js\" -- npm run test:lambda", + "test:llmobs:sdk": "mocha -r \"packages/dd-trace/test/setup/mocha.js\" --exclude \"packages/dd-trace/test/llmobs/plugins/**/*.spec.js\" \"packages/dd-trace/test/llmobs/**/*.spec.js\" ", + "test:llmobs:sdk:ci": "nyc --no-clean --include \"packages/dd-trace/src/llmobs/**/*.js\" -- npm run test:llmobs:sdk", + "test:llmobs:plugins": "mocha -r \"packages/dd-trace/test/setup/mocha.js\" \"packages/dd-trace/test/llmobs/plugins/**/*.spec.js\"", + "test:llmobs:plugins:ci": "yarn services && nyc --no-clean --include \"packages/dd-trace/src/llmobs/**/*.js\" -- npm run test:llmobs:plugins", "test:plugins": "mocha -r \"packages/dd-trace/test/setup/mocha.js\" \"packages/datadog-instrumentations/test/@($(echo $PLUGINS)).spec.js\" \"packages/datadog-plugin-@($(echo $PLUGINS))/test/**/*.spec.js\"", "test:plugins:ci": "yarn services && nyc --no-clean --include \"packages/datadog-instrumentations/src/@($(echo $PLUGINS)).js\" --include \"packages/datadog-instrumentations/src/@($(echo $PLUGINS))/**/*.js\" --include \"packages/datadog-plugin-@($(echo $PLUGINS))/src/**/*.js\" -- npm run test:plugins", "test:plugins:upstream": "node ./packages/dd-trace/test/plugins/suite.js", @@ -78,9 +83,9 @@ "dependencies": { "@datadog/native-appsec": "8.2.1", "@datadog/native-iast-rewriter": "2.5.0", - "@datadog/native-iast-taint-tracking": "3.1.0", - "@datadog/native-metrics": "^2.0.0", - "@datadog/pprof": "5.3.0", + "@datadog/native-iast-taint-tracking": "3.2.0", + "@datadog/native-metrics": "^3.0.1", + "@datadog/pprof": "5.4.1", "@datadog/sketches-js": "^2.1.0", "@opentelemetry/api": ">=1.0.0 <1.9.0", "@opentelemetry/core": "^1.14.0", diff --git a/packages/datadog-code-origin/index.js b/packages/datadog-code-origin/index.js index 530dd3cc8ae..278aac265ab 100644 --- a/packages/datadog-code-origin/index.js +++ b/packages/datadog-code-origin/index.js @@ -5,15 +5,15 @@ const { getUserLandFrames } = require('../dd-trace/src/plugins/util/stacktrace') const limit = Number(process.env._DD_CODE_ORIGIN_MAX_USER_FRAMES) || 8 module.exports = { - entryTag, - exitTag + entryTags, + exitTags } -function entryTag (topOfStackFunc) { +function entryTags (topOfStackFunc) { return tag('entry', topOfStackFunc) } -function exitTag (topOfStackFunc) { +function exitTags (topOfStackFunc) { return tag('exit', topOfStackFunc) } diff --git a/packages/datadog-core/src/utils/src/parse-tags.js b/packages/datadog-core/src/utils/src/parse-tags.js new file mode 100644 index 00000000000..4142e770e4e --- /dev/null +++ b/packages/datadog-core/src/utils/src/parse-tags.js @@ -0,0 +1,33 @@ +'use strict' + +const digitRegex = /^\d+$/ + +/** + * Converts a flat object of tags into a nested object. For example: + * { 'a.b.c': 'value' } -> { a: { b: { c: 'value' } } } + * Also supports array-keys. For example: + * { 'a.0.b': 'value' } -> { a: [{ b: 'value' }] } + * + * @param {Object} tags - Key/value pairs of tags + * @returns Object - Parsed tags + */ +module.exports = tags => { + const parsedTags = {} + for (const [tag, value] of Object.entries(tags)) { + const keys = tag.split('.') + let current = parsedTags + let depth = 0 + for (const key of keys) { + if (!current[key]) { + if (depth === keys.length - 1) { + current[key] = value + break + } + current[key] = keys[depth + 1]?.match(digitRegex) ? [] : {} + } + current = current[key] + depth++ + } + } + return parsedTags +} diff --git a/packages/datadog-core/test/utils/src/parse-tags.spec.js b/packages/datadog-core/test/utils/src/parse-tags.spec.js new file mode 100644 index 00000000000..ded1bb5974f --- /dev/null +++ b/packages/datadog-core/test/utils/src/parse-tags.spec.js @@ -0,0 +1,23 @@ +'use strict' + +require('../../../../dd-trace/test/setup/tap') + +const parseTags = require('../../../src/utils/src/parse-tags') + +describe('parseTags', () => { + it('should parse tags to object', () => { + const obj = { + 'a.0.a': 'foo', + 'a.0.b': 'bar', + 'a.1.a': 'baz' + } + + expect(parseTags(obj)).to.deep.equal({ + a: [{ a: 'foo', b: 'bar' }, { a: 'baz' }] + }) + }) + + it('should work with empty object', () => { + expect(parseTags({})).to.deep.equal({}) + }) +}) diff --git a/packages/datadog-instrumentations/src/amqplib.js b/packages/datadog-instrumentations/src/amqplib.js index f0650459a47..73275a0cd8c 100644 --- a/packages/datadog-instrumentations/src/amqplib.js +++ b/packages/datadog-instrumentations/src/amqplib.js @@ -25,6 +25,70 @@ addHook({ name: 'amqplib', file: 'lib/defs.js', versions: [MIN_VERSION] }, defs return defs }) +addHook({ name: 'amqplib', file: 'lib/channel_model.js', versions: [MIN_VERSION] }, x => { + shimmer.wrap(x.Channel.prototype, 'get', getMessage => function (queue, options) { + return getMessage.apply(this, arguments).then(message => { + if (message === null) { + return message + } + startCh.publish({ method: 'basic.get', message, fields: message.fields, queue }) + // finish right away + finishCh.publish() + return message + }) + }) + shimmer.wrap(x.Channel.prototype, 'consume', consume => function (queue, callback, options) { + if (!startCh.hasSubscribers) { + return consume.apply(this, arguments) + } + arguments[1] = (message, ...args) => { + if (message === null) { + return callback(message, ...args) + } + startCh.publish({ method: 'basic.deliver', message, fields: message.fields, queue }) + const result = callback(message, ...args) + finishCh.publish() + return result + } + return consume.apply(this, arguments) + }) + return x +}) + +addHook({ name: 'amqplib', file: 'lib/callback_model.js', versions: [MIN_VERSION] }, channel => { + shimmer.wrap(channel.Channel.prototype, 'get', getMessage => function (queue, options, callback) { + if (!startCh.hasSubscribers) { + return getMessage.apply(this, arguments) + } + arguments[2] = (error, message, ...args) => { + if (error !== null || message === null) { + return callback(error, message, ...args) + } + startCh.publish({ method: 'basic.get', message, fields: message.fields, queue }) + const result = callback(error, message, ...args) + finishCh.publish() + return result + } + return getMessage.apply(this, arguments) + }) + shimmer.wrap(channel.Channel.prototype, 'consume', consume => function (queue, callback) { + if (!startCh.hasSubscribers) { + return consume.apply(this, arguments) + } + arguments[1] = (message, ...args) => { + if (message === null) { + return callback(message, ...args) + } + startCh.publish({ method: 'basic.deliver', message, fields: message.fields, queue }) + const result = callback(message, ...args) + finishCh.publish() + return result + } + return consume.apply(this, arguments) + }) + return channel +}) + addHook({ name: 'amqplib', file: 'lib/channel.js', versions: [MIN_VERSION] }, channel => { shimmer.wrap(channel.Channel.prototype, 'sendImmediately', sendImmediately => function (method, fields) { return instrument(sendImmediately, this, arguments, methods[method], fields) @@ -33,15 +97,11 @@ addHook({ name: 'amqplib', file: 'lib/channel.js', versions: [MIN_VERSION] }, ch shimmer.wrap(channel.Channel.prototype, 'sendMessage', sendMessage => function (fields) { return instrument(sendMessage, this, arguments, 'basic.publish', fields, arguments[2]) }) - - shimmer.wrap(channel.BaseChannel.prototype, 'dispatchMessage', dispatchMessage => function (fields, message) { - return instrument(dispatchMessage, this, arguments, 'basic.deliver', fields, message) - }) return channel }) function instrument (send, channel, args, method, fields, message) { - if (!startCh.hasSubscribers) { + if (!startCh.hasSubscribers || method === 'basic.get') { return send.apply(channel, args) } diff --git a/packages/datadog-instrumentations/src/child_process.js b/packages/datadog-instrumentations/src/child_process.js index 8af49788007..f7224953367 100644 --- a/packages/datadog-instrumentations/src/child_process.js +++ b/packages/datadog-instrumentations/src/child_process.js @@ -13,19 +13,38 @@ const childProcessChannel = dc.tracingChannel('datadog:child_process:execution') // ignored exec method because it calls to execFile directly const execAsyncMethods = ['execFile', 'spawn'] -const execSyncMethods = ['execFileSync', 'spawnSync'] const names = ['child_process', 'node:child_process'] // child_process and node:child_process returns the same object instance, we only want to add hooks once let patched = false + +function throwSyncError (error) { + throw error +} + +function returnSpawnSyncError (error, context) { + context.result = { + error, + status: null, + signal: null, + output: null, + stdout: null, + stderr: null, + pid: 0 + } + + return context.result +} + names.forEach(name => { addHook({ name }, childProcess => { if (!patched) { patched = true - shimmer.massWrap(childProcess, execAsyncMethods, wrapChildProcessAsyncMethod()) - shimmer.massWrap(childProcess, execSyncMethods, wrapChildProcessSyncMethod()) - shimmer.wrap(childProcess, 'execSync', wrapChildProcessSyncMethod(true)) + shimmer.massWrap(childProcess, execAsyncMethods, wrapChildProcessAsyncMethod(childProcess.ChildProcess)) + shimmer.wrap(childProcess, 'execSync', wrapChildProcessSyncMethod(throwSyncError, true)) + shimmer.wrap(childProcess, 'execFileSync', wrapChildProcessSyncMethod(throwSyncError)) + shimmer.wrap(childProcess, 'spawnSync', wrapChildProcessSyncMethod(returnSpawnSyncError)) } return childProcess @@ -34,17 +53,21 @@ names.forEach(name => { function normalizeArgs (args, shell) { const childProcessInfo = { - command: args[0] + command: args[0], + file: args[0] } if (Array.isArray(args[1])) { childProcessInfo.command = childProcessInfo.command + ' ' + args[1].join(' ') + childProcessInfo.fileArgs = args[1] + if (args[2] !== null && typeof args[2] === 'object') { childProcessInfo.options = args[2] } } else if (args[1] !== null && typeof args[1] === 'object') { childProcessInfo.options = args[1] } + childProcessInfo.shell = shell || childProcessInfo.options?.shell === true || typeof childProcessInfo.options?.shell === 'string' @@ -52,7 +75,21 @@ function normalizeArgs (args, shell) { return childProcessInfo } -function wrapChildProcessSyncMethod (shell = false) { +function createContextFromChildProcessInfo (childProcessInfo) { + const context = { + command: childProcessInfo.command, + file: childProcessInfo.file, + shell: childProcessInfo.shell + } + + if (childProcessInfo.fileArgs) { + context.fileArgs = childProcessInfo.fileArgs + } + + return context +} + +function wrapChildProcessSyncMethod (returnError, shell = false) { return function wrapMethod (childProcessMethod) { return function () { if (!childProcessChannel.start.hasSubscribers || arguments.length === 0) { @@ -63,14 +100,30 @@ function wrapChildProcessSyncMethod (shell = false) { const innerResource = new AsyncResource('bound-anonymous-fn') return innerResource.runInAsyncScope(() => { - return childProcessChannel.traceSync( - childProcessMethod, - { - command: childProcessInfo.command, - shell: childProcessInfo.shell - }, - this, - ...arguments) + const context = createContextFromChildProcessInfo(childProcessInfo) + const abortController = new AbortController() + + childProcessChannel.start.publish({ ...context, abortController }) + + try { + if (abortController.signal.aborted) { + const error = abortController.signal.reason || new Error('Aborted') + // expected behaviors on error are different + return returnError(error, context) + } + + const result = childProcessMethod.apply(this, arguments) + context.result = result + + return result + } catch (err) { + context.error = err + childProcessChannel.error.publish(context) + + throw err + } finally { + childProcessChannel.end.publish(context) + } }) } } @@ -84,18 +137,52 @@ function wrapChildProcessCustomPromisifyMethod (customPromisifyMethod, shell) { const childProcessInfo = normalizeArgs(arguments, shell) - return childProcessChannel.tracePromise( - customPromisifyMethod, - { - command: childProcessInfo.command, - shell: childProcessInfo.shell - }, - this, - ...arguments) + const context = createContextFromChildProcessInfo(childProcessInfo) + + const { start, end, asyncStart, asyncEnd, error } = childProcessChannel + const abortController = new AbortController() + + start.publish({ + ...context, + abortController + }) + + let result + if (abortController.signal.aborted) { + result = Promise.reject(abortController.signal.reason || new Error('Aborted')) + } else { + try { + result = customPromisifyMethod.apply(this, arguments) + } catch (error) { + error.publish({ ...context, error }) + throw error + } finally { + end.publish(context) + } + } + + function reject (err) { + context.error = err + error.publish(context) + asyncStart.publish(context) + + asyncEnd.publish(context) + return Promise.reject(err) + } + + function resolve (result) { + context.result = result + asyncStart.publish(context) + + asyncEnd.publish(context) + return result + } + + return Promise.prototype.then.call(result, resolve, reject) } } -function wrapChildProcessAsyncMethod (shell = false) { +function wrapChildProcessAsyncMethod (ChildProcess, shell = false) { return function wrapMethod (childProcessMethod) { function wrappedChildProcessMethod () { if (!childProcessChannel.start.hasSubscribers || arguments.length === 0) { @@ -112,9 +199,31 @@ function wrapChildProcessAsyncMethod (shell = false) { const innerResource = new AsyncResource('bound-anonymous-fn') return innerResource.runInAsyncScope(() => { - childProcessChannel.start.publish({ command: childProcessInfo.command, shell: childProcessInfo.shell }) + const context = createContextFromChildProcessInfo(childProcessInfo) + const abortController = new AbortController() + + childProcessChannel.start.publish({ ...context, abortController }) + + let childProcess + if (abortController.signal.aborted) { + childProcess = new ChildProcess() + childProcess.on('error', () => {}) // Original method does not crash when non subscribers + + process.nextTick(() => { + const error = abortController.signal.reason || new Error('Aborted') + childProcess.emit('error', error) + + const cb = arguments[arguments.length - 1] + if (typeof cb === 'function') { + cb(error) + } + + childProcess.emit('close') + }) + } else { + childProcess = childProcessMethod.apply(this, arguments) + } - const childProcess = childProcessMethod.apply(this, arguments) if (childProcess) { let errorExecuted = false @@ -129,8 +238,7 @@ function wrapChildProcessAsyncMethod (shell = false) { childProcessChannel.error.publish() } childProcessChannel.asyncEnd.publish({ - command: childProcessInfo.command, - shell: childProcessInfo.shell, + ...context, result: code }) }) diff --git a/packages/datadog-instrumentations/src/helpers/hooks.js b/packages/datadog-instrumentations/src/helpers/hooks.js index a20c54c7155..179a11f3a72 100644 --- a/packages/datadog-instrumentations/src/helpers/hooks.js +++ b/packages/datadog-instrumentations/src/helpers/hooks.js @@ -80,6 +80,7 @@ module.exports = { 'mongodb-core': () => require('../mongodb-core'), mongoose: () => require('../mongoose'), mquery: () => require('../mquery'), + multer: () => require('../multer'), mysql: () => require('../mysql'), mysql2: () => require('../mysql2'), net: () => require('../net'), @@ -91,6 +92,7 @@ module.exports = { 'node:http2': () => require('../http2'), 'node:https': () => require('../http'), 'node:net': () => require('../net'), + 'node:url': () => require('../url'), nyc: () => require('../nyc'), oracledb: () => require('../oracledb'), openai: () => require('../openai'), @@ -115,6 +117,7 @@ module.exports = { sharedb: () => require('../sharedb'), tedious: () => require('../tedious'), undici: () => require('../undici'), + url: () => require('../url'), vitest: { esmFirst: true, fn: () => require('../vitest') }, when: () => require('../when'), winston: () => require('../winston'), diff --git a/packages/datadog-instrumentations/src/kafkajs.js b/packages/datadog-instrumentations/src/kafkajs.js index 395c69de057..e75c03e7e64 100644 --- a/packages/datadog-instrumentations/src/kafkajs.js +++ b/packages/datadog-instrumentations/src/kafkajs.js @@ -52,45 +52,59 @@ addHook({ name: 'kafkajs', file: 'src/index.js', versions: ['>=1.4'] }, (BaseKaf const send = producer.send const bootstrapServers = this._brokers - producer.send = function () { - const innerAsyncResource = new AsyncResource('bound-anonymous-fn') + const kafkaClusterIdPromise = getKafkaClusterId(this) - return innerAsyncResource.runInAsyncScope(() => { - if (!producerStartCh.hasSubscribers) { - return send.apply(this, arguments) - } + producer.send = function () { + const wrappedSend = (clusterId) => { + const innerAsyncResource = new AsyncResource('bound-anonymous-fn') - try { - const { topic, messages = [] } = arguments[0] - for (const message of messages) { - if (message !== null && typeof message === 'object') { - message.headers = message.headers || {} - } + return innerAsyncResource.runInAsyncScope(() => { + if (!producerStartCh.hasSubscribers) { + return send.apply(this, arguments) } - producerStartCh.publish({ topic, messages, bootstrapServers }) - - const result = send.apply(this, arguments) - - result.then( - innerAsyncResource.bind(res => { - producerFinishCh.publish(undefined) - producerCommitCh.publish(res) - }), - innerAsyncResource.bind(err => { - if (err) { - producerErrorCh.publish(err) + + try { + const { topic, messages = [] } = arguments[0] + for (const message of messages) { + if (message !== null && typeof message === 'object') { + message.headers = message.headers || {} } - producerFinishCh.publish(undefined) - }) - ) + } + producerStartCh.publish({ topic, messages, bootstrapServers, clusterId }) - return result - } catch (e) { - producerErrorCh.publish(e) - producerFinishCh.publish(undefined) - throw e - } - }) + const result = send.apply(this, arguments) + + result.then( + innerAsyncResource.bind(res => { + producerFinishCh.publish(undefined) + producerCommitCh.publish(res) + }), + innerAsyncResource.bind(err => { + if (err) { + producerErrorCh.publish(err) + } + producerFinishCh.publish(undefined) + }) + ) + + return result + } catch (e) { + producerErrorCh.publish(e) + producerFinishCh.publish(undefined) + throw e + } + }) + } + + if (!isPromise(kafkaClusterIdPromise)) { + // promise is already resolved + return wrappedSend(kafkaClusterIdPromise) + } else { + // promise is not resolved + return kafkaClusterIdPromise.then((clusterId) => { + return wrappedSend(clusterId) + }) + } } return producer }) @@ -100,15 +114,17 @@ addHook({ name: 'kafkajs', file: 'src/index.js', versions: ['>=1.4'] }, (BaseKaf return createConsumer.apply(this, arguments) } - const eachMessageExtractor = (args) => { + const kafkaClusterIdPromise = getKafkaClusterId(this) + + const eachMessageExtractor = (args, clusterId) => { const { topic, partition, message } = args[0] - return { topic, partition, message, groupId } + return { topic, partition, message, groupId, clusterId } } - const eachBatchExtractor = (args) => { + const eachBatchExtractor = (args, clusterId) => { const { batch } = args[0] const { topic, partition, messages } = batch - return { topic, partition, messages, groupId } + return { topic, partition, messages, groupId, clusterId } } const consumer = createConsumer.apply(this, arguments) @@ -116,43 +132,53 @@ addHook({ name: 'kafkajs', file: 'src/index.js', versions: ['>=1.4'] }, (BaseKaf consumer.on(consumer.events.COMMIT_OFFSETS, commitsFromEvent) const run = consumer.run - const groupId = arguments[0].groupId + consumer.run = function ({ eachMessage, eachBatch, ...runArgs }) { - eachMessage = wrapFunction( - eachMessage, - consumerStartCh, - consumerFinishCh, - consumerErrorCh, - eachMessageExtractor - ) - - eachBatch = wrapFunction( - eachBatch, - batchConsumerStartCh, - batchConsumerFinishCh, - batchConsumerErrorCh, - eachBatchExtractor - ) - - return run({ - eachMessage, - eachBatch, - ...runArgs - }) + const wrapConsume = (clusterId) => { + return run({ + eachMessage: wrappedCallback( + eachMessage, + consumerStartCh, + consumerFinishCh, + consumerErrorCh, + eachMessageExtractor, + clusterId + ), + eachBatch: wrappedCallback( + eachBatch, + batchConsumerStartCh, + batchConsumerFinishCh, + batchConsumerErrorCh, + eachBatchExtractor, + clusterId + ), + ...runArgs + }) + } + + if (!isPromise(kafkaClusterIdPromise)) { + // promise is already resolved + return wrapConsume(kafkaClusterIdPromise) + } else { + // promise is not resolved + return kafkaClusterIdPromise.then((clusterId) => { + return wrapConsume(clusterId) + }) + } } - return consumer }) return Kafka }) -const wrapFunction = (fn, startCh, finishCh, errorCh, extractArgs) => { +const wrappedCallback = (fn, startCh, finishCh, errorCh, extractArgs, clusterId) => { return typeof fn === 'function' ? function (...args) { const innerAsyncResource = new AsyncResource('bound-anonymous-fn') return innerAsyncResource.runInAsyncScope(() => { - const extractedArgs = extractArgs(args) + const extractedArgs = extractArgs(args, clusterId) + startCh.publish(extractedArgs) try { const result = fn.apply(this, args) @@ -179,3 +205,37 @@ const wrapFunction = (fn, startCh, finishCh, errorCh, extractArgs) => { } : fn } + +const getKafkaClusterId = (kafka) => { + if (kafka._ddKafkaClusterId) { + return kafka._ddKafkaClusterId + } + + if (!kafka.admin) { + return null + } + + const admin = kafka.admin() + + if (!admin.describeCluster) { + return null + } + + return admin.connect() + .then(() => { + return admin.describeCluster() + }) + .then((clusterInfo) => { + const clusterId = clusterInfo?.clusterId + kafka._ddKafkaClusterId = clusterId + admin.disconnect() + return clusterId + }) + .catch((error) => { + throw error + }) +} + +function isPromise (obj) { + return !!obj && (typeof obj === 'object' || typeof obj === 'function') && typeof obj.then === 'function' +} diff --git a/packages/datadog-instrumentations/src/mocha/utils.js b/packages/datadog-instrumentations/src/mocha/utils.js index a4da0762039..2b51fd6e73b 100644 --- a/packages/datadog-instrumentations/src/mocha/utils.js +++ b/packages/datadog-instrumentations/src/mocha/utils.js @@ -280,12 +280,12 @@ function getOnFailHandler (isMain) { } function getOnTestRetryHandler () { - return function (test) { + return function (test, err) { const asyncResource = getTestAsyncResource(test) if (asyncResource) { const isFirstAttempt = test._currentRetry === 0 asyncResource.runInAsyncScope(() => { - testRetryCh.publish(isFirstAttempt) + testRetryCh.publish({ isFirstAttempt, err }) }) } const key = getTestToArKey(test) diff --git a/packages/datadog-instrumentations/src/multer.js b/packages/datadog-instrumentations/src/multer.js new file mode 100644 index 00000000000..90fae3a8297 --- /dev/null +++ b/packages/datadog-instrumentations/src/multer.js @@ -0,0 +1,37 @@ +'use strict' + +const shimmer = require('../../datadog-shimmer') +const { channel, addHook, AsyncResource } = require('./helpers/instrument') + +const multerReadCh = channel('datadog:multer:read:finish') + +function publishRequestBodyAndNext (req, res, next) { + return shimmer.wrapFunction(next, next => function () { + if (multerReadCh.hasSubscribers && req) { + const abortController = new AbortController() + const body = req.body + + multerReadCh.publish({ req, res, body, abortController }) + + if (abortController.signal.aborted) return + } + + return next.apply(this, arguments) + }) +} + +addHook({ + name: 'multer', + file: 'lib/make-middleware.js', + versions: ['^1.4.4-lts.1'] +}, makeMiddleware => { + return shimmer.wrapFunction(makeMiddleware, makeMiddleware => function () { + const middleware = makeMiddleware.apply(this, arguments) + + return shimmer.wrapFunction(middleware, middleware => function wrapMulterMiddleware (req, res, next) { + const nextResource = new AsyncResource('bound-anonymous-fn') + arguments[2] = nextResource.bind(publishRequestBodyAndNext(req, res, next)) + return middleware.apply(this, arguments) + }) + }) +}) diff --git a/packages/datadog-instrumentations/src/openai.js b/packages/datadog-instrumentations/src/openai.js index 940b5919d24..3528b1ecc13 100644 --- a/packages/datadog-instrumentations/src/openai.js +++ b/packages/datadog-instrumentations/src/openai.js @@ -3,8 +3,8 @@ const { addHook } = require('./helpers/instrument') const shimmer = require('../../datadog-shimmer') -const tracingChannel = require('dc-polyfill').tracingChannel -const ch = tracingChannel('apm:openai:request') +const dc = require('dc-polyfill') +const ch = dc.tracingChannel('apm:openai:request') const V4_PACKAGE_SHIMS = [ { diff --git a/packages/datadog-instrumentations/src/router.js b/packages/datadog-instrumentations/src/router.js index cdd08f9f539..00fbb6cec1a 100644 --- a/packages/datadog-instrumentations/src/router.js +++ b/packages/datadog-instrumentations/src/router.js @@ -112,7 +112,6 @@ function createWrapRouterMethod (name) { path: pattern instanceof RegExp ? `(${pattern})` : pattern, test: layer => { const matchers = layerMatchers.get(layer) - return !isFastStar(layer, matchers) && !isFastSlash(layer, matchers) && cachedPathToRegExp(pattern).test(layer.path) @@ -121,7 +120,7 @@ function createWrapRouterMethod (name) { } function isFastStar (layer, matchers) { - if (layer.regexp.fast_star !== undefined) { + if (layer.regexp?.fast_star !== undefined) { return layer.regexp.fast_star } @@ -129,7 +128,7 @@ function createWrapRouterMethod (name) { } function isFastSlash (layer, matchers) { - if (layer.regexp.fast_slash !== undefined) { + if (layer.regexp?.fast_slash !== undefined) { return layer.regexp.fast_slash } diff --git a/packages/datadog-instrumentations/src/url.js b/packages/datadog-instrumentations/src/url.js new file mode 100644 index 00000000000..18edb0079e3 --- /dev/null +++ b/packages/datadog-instrumentations/src/url.js @@ -0,0 +1,84 @@ +'use strict' + +const { addHook, channel } = require('./helpers/instrument') +const shimmer = require('../../datadog-shimmer') +const names = ['url', 'node:url'] + +const parseFinishedChannel = channel('datadog:url:parse:finish') +const urlGetterChannel = channel('datadog:url:getter:finish') +const instrumentedGetters = ['host', 'origin', 'hostname'] + +addHook({ name: names }, function (url) { + shimmer.wrap(url, 'parse', (parse) => { + return function wrappedParse (input) { + const parsedValue = parse.apply(this, arguments) + if (!parseFinishedChannel.hasSubscribers) return parsedValue + + parseFinishedChannel.publish({ + input, + parsed: parsedValue, + isURL: false + }) + + return parsedValue + } + }) + + const URLPrototype = url.URL.prototype.constructor.prototype + instrumentedGetters.forEach(property => { + const originalDescriptor = Object.getOwnPropertyDescriptor(URLPrototype, property) + + if (originalDescriptor?.get) { + const newDescriptor = shimmer.wrap(originalDescriptor, 'get', function (originalGet) { + return function get () { + const result = originalGet.apply(this, arguments) + if (!urlGetterChannel.hasSubscribers) return result + + const context = { urlObject: this, result, property } + urlGetterChannel.publish(context) + + return context.result + } + }) + + Object.defineProperty(URLPrototype, property, newDescriptor) + } + }) + + shimmer.wrap(url, 'URL', (URL) => { + return class extends URL { + constructor (input, base) { + super(...arguments) + + if (!parseFinishedChannel.hasSubscribers) return + + parseFinishedChannel.publish({ + input, + base, + parsed: this, + isURL: true + }) + } + } + }) + + if (url.URL.parse) { + shimmer.wrap(url.URL, 'parse', (parse) => { + return function wrappedParse (input, base) { + const parsedValue = parse.apply(this, arguments) + if (!parseFinishedChannel.hasSubscribers) return parsedValue + + parseFinishedChannel.publish({ + input, + base, + parsed: parsedValue, + isURL: true + }) + + return parsedValue + } + }) + } + + return url +}) diff --git a/packages/datadog-instrumentations/test/child_process.spec.js b/packages/datadog-instrumentations/test/child_process.spec.js index ffd002e8a6b..f6d19423797 100644 --- a/packages/datadog-instrumentations/test/child_process.spec.js +++ b/packages/datadog-instrumentations/test/child_process.spec.js @@ -9,7 +9,7 @@ describe('child process', () => { const modules = ['child_process', 'node:child_process'] const execAsyncMethods = ['execFile', 'spawn'] const execAsyncShellMethods = ['exec'] - const execSyncMethods = ['execFileSync'] + const execSyncMethods = ['execFileSync', 'spawnSync'] const execSyncShellMethods = ['execSync'] const childProcessChannel = dc.tracingChannel('datadog:child_process:execution') @@ -51,7 +51,7 @@ describe('child process', () => { }) }) - describe('async methods', (done) => { + describe('async methods', () => { describe('command not interpreted by a shell by default', () => { execAsyncMethods.forEach(methodName => { describe(`method ${methodName}`, () => { @@ -59,20 +59,59 @@ describe('child process', () => { const childEmitter = childProcess[methodName]('ls') childEmitter.once('close', () => { - expect(start).to.have.been.calledOnceWith({ command: 'ls', shell: false }) - expect(asyncFinish).to.have.been.calledOnceWith({ command: 'ls', shell: false, result: 0 }) + expect(start).to.have.been.calledOnceWith({ + command: 'ls', + file: 'ls', + shell: false, + abortController: sinon.match.instanceOf(AbortController) + }) + expect(asyncFinish).to.have.been.calledOnceWith({ + command: 'ls', + file: 'ls', + shell: false, + result: 0 + }) expect(error).not.to.have.been.called done() }) }) + it('should publish arguments', (done) => { + const childEmitter = childProcess[methodName]('ls', ['-la']) + + childEmitter.once('close', () => { + expect(start).to.have.been.calledOnceWith({ + command: 'ls -la', + file: 'ls', + fileArgs: ['-la'], + shell: false, + abortController: sinon.match.instanceOf(AbortController) + }) + expect(asyncFinish).to.have.been.calledOnceWith({ + command: 'ls -la', + file: 'ls', + shell: false, + fileArgs: ['-la'], + result: 0 + }) + + done() + }) + }) + it('should execute error callback', (done) => { const childEmitter = childProcess[methodName]('invalid_command_test') childEmitter.once('close', () => { - expect(start).to.have.been.calledOnceWith({ command: 'invalid_command_test', shell: false }) + expect(start).to.have.been.calledOnceWith({ + command: 'invalid_command_test', + file: 'invalid_command_test', + shell: false, + abortController: sinon.match.instanceOf(AbortController) + }) expect(asyncFinish).to.have.been.calledOnceWith({ command: 'invalid_command_test', + file: 'invalid_command_test', shell: false, result: -2 }) @@ -85,13 +124,20 @@ describe('child process', () => { const childEmitter = childProcess[methodName]('node -e "process.exit(1)"', { shell: true }) childEmitter.once('close', () => { - expect(start).to.have.been.calledOnceWith({ command: 'node -e "process.exit(1)"', shell: true }) + expect(start).to.have.been.calledOnceWith({ + command: 'node -e "process.exit(1)"', + file: 'node -e "process.exit(1)"', + abortController: sinon.match.instanceOf(AbortController), + shell: true + }) expect(asyncFinish).to.have.been.calledOnceWith({ command: 'node -e "process.exit(1)"', + file: 'node -e "process.exit(1)"', shell: true, result: 1 }) expect(error).to.have.been.calledOnce + done() }) }) @@ -101,13 +147,15 @@ describe('child process', () => { describe(`method ${methodName} with promisify`, () => { it('should execute success callbacks', async () => { await promisify(childProcess[methodName])('echo') + expect(start.firstCall.firstArg).to.include({ command: 'echo', + file: 'echo', shell: false }) - expect(asyncFinish).to.have.been.calledOnceWith({ command: 'echo', + file: 'echo', shell: false, result: { stdout: '\n', @@ -177,8 +225,13 @@ describe('child process', () => { const res = childProcess[methodName]('ls') res.once('close', () => { - expect(start).to.have.been.calledOnceWith({ command: 'ls', shell: true }) - expect(asyncFinish).to.have.been.calledOnceWith({ command: 'ls', shell: true, result: 0 }) + expect(start).to.have.been.calledOnceWith({ + command: 'ls', + file: 'ls', + shell: true, + abortController: sinon.match.instanceOf(AbortController) + }) + expect(asyncFinish).to.have.been.calledOnceWith({ command: 'ls', file: 'ls', shell: true, result: 0 }) expect(error).not.to.have.been.called done() }) @@ -188,9 +241,15 @@ describe('child process', () => { const res = childProcess[methodName]('node -e "process.exit(1)"') res.once('close', () => { - expect(start).to.have.been.calledOnceWith({ command: 'node -e "process.exit(1)"', shell: true }) + expect(start).to.have.been.calledOnceWith({ + command: 'node -e "process.exit(1)"', + file: 'node -e "process.exit(1)"', + abortController: sinon.match.instanceOf(AbortController), + shell: true + }) expect(asyncFinish).to.have.been.calledOnceWith({ command: 'node -e "process.exit(1)"', + file: 'node -e "process.exit(1)"', shell: true, result: 1 }) @@ -203,10 +262,16 @@ describe('child process', () => { const res = childProcess[methodName]('invalid_command_test') res.once('close', () => { - expect(start).to.have.been.calledOnceWith({ command: 'invalid_command_test', shell: true }) + expect(start).to.have.been.calledOnceWith({ + command: 'invalid_command_test', + file: 'invalid_command_test', + abortController: sinon.match.instanceOf(AbortController), + shell: true + }) expect(error).to.have.been.calledOnce expect(asyncFinish).to.have.been.calledOnceWith({ command: 'invalid_command_test', + file: 'invalid_command_test', shell: true, result: 127 }) @@ -220,10 +285,13 @@ describe('child process', () => { await promisify(childProcess[methodName])('echo') expect(start).to.have.been.calledOnceWith({ command: 'echo', + file: 'echo', + abortController: sinon.match.instanceOf(AbortController), shell: true }) expect(asyncFinish).to.have.been.calledOnceWith({ command: 'echo', + file: 'echo', shell: true, result: 0 }) @@ -235,7 +303,12 @@ describe('child process', () => { await promisify(childProcess[methodName])('invalid_command_test') return Promise.reject(new Error('Command expected to fail')) } catch (e) { - expect(start).to.have.been.calledOnceWith({ command: 'invalid_command_test', shell: true }) + expect(start).to.have.been.calledOnceWith({ + command: 'invalid_command_test', + file: 'invalid_command_test', + abortController: sinon.match.instanceOf(AbortController), + shell: true + }) expect(asyncFinish).to.have.been.calledOnce expect(error).to.have.been.calledOnce } @@ -246,9 +319,15 @@ describe('child process', () => { await promisify(childProcess[methodName])('node -e "process.exit(1)"') return Promise.reject(new Error('Command expected to fail')) } catch (e) { - expect(start).to.have.been.calledOnceWith({ command: 'node -e "process.exit(1)"', shell: true }) + expect(start).to.have.been.calledOnceWith({ + command: 'node -e "process.exit(1)"', + file: 'node -e "process.exit(1)"', + abortController: sinon.match.instanceOf(AbortController), + shell: true + }) expect(asyncFinish).to.have.been.calledOnceWith({ command: 'node -e "process.exit(1)"', + file: 'node -e "process.exit(1)"', shell: true, result: 1 }) @@ -258,6 +337,62 @@ describe('child process', () => { }) }) }) + + describe('aborting in abortController', () => { + const abortError = new Error('AbortError') + function abort ({ abortController }) { + abortController.abort(abortError) + + if (!abortController.signal.reason) { + abortController.signal.reason = abortError + } + } + + beforeEach(() => { + childProcessChannel.subscribe({ start: abort }) + }) + + afterEach(() => { + childProcessChannel.unsubscribe({ start: abort }) + }) + + ;[...execAsyncMethods, ...execAsyncShellMethods].forEach((methodName) => { + describe(`method ${methodName}`, () => { + it('should execute callback with the error', (done) => { + childProcess[methodName]('aborted_command', (error) => { + expect(error).to.be.equal(abortError) + + done() + }) + }) + + it('should emit error and close', (done) => { + const cp = childProcess[methodName]('aborted_command') + const errorCallback = sinon.stub() + + cp.on('error', errorCallback) + cp.on('close', () => { + expect(errorCallback).to.have.been.calledWithExactly(abortError) + done() + }) + }) + + it('should emit error and close and execute the callback', (done) => { + const callback = sinon.stub() + const errorCallback = sinon.stub() + const cp = childProcess[methodName]('aborted_command', callback) + + cp.on('error', errorCallback) + cp.on('close', () => { + expect(callback).to.have.been.calledWithExactly(abortError) + expect(errorCallback).to.have.been.calledWithExactly(abortError) + + done() + }) + }) + }) + }) + }) }) describe('sync methods', () => { @@ -269,13 +404,15 @@ describe('child process', () => { expect(start).to.have.been.calledOnceWith({ command: 'ls', + file: 'ls', shell: false, - result + abortController: sinon.match.instanceOf(AbortController) }, 'tracing:datadog:child_process:execution:start') expect(finish).to.have.been.calledOnceWith({ command: 'ls', + file: 'ls', shell: false, result }, @@ -284,56 +421,105 @@ describe('child process', () => { expect(error).not.to.have.been.called }) - it('should execute error callback', () => { - let childError - try { - childProcess[methodName]('invalid_command_test') - } catch (error) { - childError = error - } finally { - expect(start).to.have.been.calledOnceWith({ - command: 'invalid_command_test', - shell: false, - error: childError - }) - expect(finish).to.have.been.calledOnce - expect(error).to.have.been.calledOnce - } - }) + it('should publish arguments', () => { + const result = childProcess[methodName]('ls', ['-la']) - it('should execute error callback with `exit 1` command', () => { - let childError - try { - childProcess[methodName]('node -e "process.exit(1)"') - } catch (error) { - childError = error - } finally { - expect(start).to.have.been.calledOnceWith({ - command: 'node -e "process.exit(1)"', - shell: false, - error: childError - }) - expect(finish).to.have.been.calledOnce - } + expect(start).to.have.been.calledOnceWith({ + command: 'ls -la', + file: 'ls', + shell: false, + fileArgs: ['-la'], + abortController: sinon.match.instanceOf(AbortController) + }) + expect(finish).to.have.been.calledOnceWith({ + command: 'ls -la', + file: 'ls', + shell: false, + fileArgs: ['-la'], + result + }) }) - if (methodName !== 'execFileSync' || NODE_MAJOR > 16) { - // when a process return an invalid code, in node <=16, in execFileSync with shell:true - // an exception is not thrown - it('should execute error callback with `exit 1` command with shell: true', () => { - let childError + + // errors are handled in a different way in spawnSync method + if (methodName !== 'spawnSync') { + it('should execute error callback', () => { + let childError, result try { - childProcess[methodName]('node -e "process.exit(1)"', { shell: true }) + result = childProcess[methodName]('invalid_command_test') } catch (error) { childError = error } finally { + childError = childError || result?.error + + const expectedContext = { + command: 'invalid_command_test', + file: 'invalid_command_test', + shell: false + } expect(start).to.have.been.calledOnceWith({ + ...expectedContext, + abortController: sinon.match.instanceOf(AbortController) + }) + expect(finish).to.have.been.calledOnceWith({ + ...expectedContext, + error: childError + }) + expect(error).to.have.been.calledOnceWith({ + ...expectedContext, + error: childError + }) + } + }) + + it('should execute error callback with `exit 1` command', () => { + let childError + try { + childProcess[methodName]('node -e "process.exit(1)"') + } catch (error) { + childError = error + } finally { + const expectedContext = { command: 'node -e "process.exit(1)"', - shell: true, + file: 'node -e "process.exit(1)"', + shell: false + } + expect(start).to.have.been.calledOnceWith({ + ...expectedContext, + abortController: sinon.match.instanceOf(AbortController) + }) + expect(finish).to.have.been.calledOnceWith({ + ...expectedContext, error: childError }) - expect(finish).to.have.been.calledOnce } }) + + if (methodName !== 'execFileSync' || NODE_MAJOR > 16) { + // when a process return an invalid code, in node <=16, in execFileSync with shell:true + // an exception is not thrown + it('should execute error callback with `exit 1` command with shell: true', () => { + let childError + try { + childProcess[methodName]('node -e "process.exit(1)"', { shell: true }) + } catch (error) { + childError = error + } finally { + const expectedContext = { + command: 'node -e "process.exit(1)"', + file: 'node -e "process.exit(1)"', + shell: true + } + expect(start).to.have.been.calledOnceWith({ + ...expectedContext, + abortController: sinon.match.instanceOf(AbortController) + }) + expect(finish).to.have.been.calledOnceWith({ + ...expectedContext, + error: childError + }) + } + }) + } } }) }) @@ -345,14 +531,17 @@ describe('child process', () => { it('should execute success callbacks', () => { const result = childProcess[methodName]('ls') - expect(start).to.have.been.calledOnceWith({ + const expectedContext = { command: 'ls', - shell: true, - result + file: 'ls', + shell: true + } + expect(start).to.have.been.calledOnceWith({ + ...expectedContext, + abortController: sinon.match.instanceOf(AbortController) }) expect(finish).to.have.been.calledOnceWith({ - command: 'ls', - shell: true, + ...expectedContext, result }) expect(error).not.to.have.been.called @@ -365,13 +554,23 @@ describe('child process', () => { } catch (error) { childError = error } finally { - expect(start).to.have.been.calledOnceWith({ + const expectedContext = { command: 'invalid_command_test', - shell: true, + file: 'invalid_command_test', + shell: true + } + expect(start).to.have.been.calledOnceWith({ + ...expectedContext, + abortController: sinon.match.instanceOf(AbortController) + }) + expect(finish).to.have.been.calledOnceWith({ + ...expectedContext, + error: childError + }) + expect(error).to.have.been.calledOnceWith({ + ...expectedContext, error: childError }) - expect(finish).to.have.been.calledOnce - expect(error).to.have.been.calledOnce } }) @@ -382,17 +581,71 @@ describe('child process', () => { } catch (error) { childError = error } finally { - expect(start).to.have.been.calledOnceWith({ + const expectedContext = { command: 'node -e "process.exit(1)"', - shell: true, + file: 'node -e "process.exit(1)"', + shell: true + } + expect(start).to.have.been.calledOnceWith({ + ...expectedContext, + abortController: sinon.match.instanceOf(AbortController) + }) + expect(finish).to.have.been.calledOnceWith({ + ...expectedContext, error: childError }) - expect(finish).to.have.been.calledOnce } }) }) }) }) + + describe('aborting in abortController', () => { + const abortError = new Error('AbortError') + function abort ({ abortController }) { + abortController.abort(abortError) + } + + beforeEach(() => { + childProcessChannel.subscribe({ start: abort }) + }) + + afterEach(() => { + childProcessChannel.unsubscribe({ start: abort }) + }) + + ;['execFileSync', 'execSync'].forEach((methodName) => { + describe(`method ${methodName}`, () => { + it('should throw the expected error', () => { + try { + childProcess[methodName]('aborted_command') + } catch (e) { + expect(e).to.be.equal(abortError) + + return + } + + throw new Error('Expected to fail') + }) + }) + }) + + describe('method spawnSync', () => { + it('should return error field', () => { + const result = childProcess.spawnSync('aborted_command') + + expect(result).to.be.deep.equal({ + error: abortError, + status: null, + signal: null, + output: null, + stdout: null, + stderr: null, + pid: 0 + }) + }) + }) + }) }) }) }) diff --git a/packages/datadog-instrumentations/test/multer.spec.js b/packages/datadog-instrumentations/test/multer.spec.js new file mode 100644 index 00000000000..f7edcee6cd3 --- /dev/null +++ b/packages/datadog-instrumentations/test/multer.spec.js @@ -0,0 +1,108 @@ +'use strict' + +const dc = require('dc-polyfill') +const axios = require('axios') +const agent = require('../../dd-trace/test/plugins/agent') +const { storage } = require('../../datadog-core') + +withVersions('multer', 'multer', version => { + describe('multer parser instrumentation', () => { + const multerReadCh = dc.channel('datadog:multer:read:finish') + let port, server, middlewareProcessBodyStub, formData + + before(() => { + return agent.load(['http', 'express', 'multer'], { client: false }) + }) + + before((done) => { + const express = require('../../../versions/express').get() + const multer = require(`../../../versions/multer@${version}`).get() + const uploadToMemory = multer({ storage: multer.memoryStorage(), limits: { fileSize: 200000 } }) + + const app = express() + + app.post('/', uploadToMemory.single('file'), (req, res) => { + middlewareProcessBodyStub(req.body.key) + res.end('DONE') + }) + server = app.listen(0, () => { + port = server.address().port + done() + }) + }) + + beforeEach(async () => { + middlewareProcessBodyStub = sinon.stub() + + formData = new FormData() + formData.append('key', 'value') + }) + + after(() => { + server.close() + return agent.close({ ritmReset: false }) + }) + + it('should not abort the request by default', async () => { + const res = await axios.post(`http://localhost:${port}/`, formData) + + expect(middlewareProcessBodyStub).to.be.calledOnceWithExactly(formData.get('key')) + expect(res.data).to.be.equal('DONE') + }) + + it('should not abort the request with non blocker subscription', async () => { + function noop () {} + multerReadCh.subscribe(noop) + + try { + const res = await axios.post(`http://localhost:${port}/`, formData) + + expect(middlewareProcessBodyStub).to.be.calledOnceWithExactly(formData.get('key')) + expect(res.data).to.be.equal('DONE') + } finally { + multerReadCh.unsubscribe(noop) + } + }) + + it('should abort the request when abortController.abort() is called', async () => { + function blockRequest ({ res, abortController }) { + res.end('BLOCKED') + abortController.abort() + } + multerReadCh.subscribe(blockRequest) + + try { + const res = await axios.post(`http://localhost:${port}/`, formData) + + expect(middlewareProcessBodyStub).not.to.be.called + expect(res.data).to.be.equal('BLOCKED') + } finally { + multerReadCh.unsubscribe(blockRequest) + } + }) + + it('should not lose the http async context', async () => { + let store + let payload + + function handler (data) { + store = storage.getStore() + payload = data + } + multerReadCh.subscribe(handler) + + try { + const res = await axios.post(`http://localhost:${port}/`, formData) + + expect(store).to.have.property('req', payload.req) + expect(store).to.have.property('res', payload.res) + expect(store).to.have.property('span') + + expect(middlewareProcessBodyStub).to.be.calledOnceWithExactly(formData.get('key')) + expect(res.data).to.be.equal('DONE') + } finally { + multerReadCh.unsubscribe(handler) + } + }) + }) +}) diff --git a/packages/datadog-instrumentations/test/url.spec.js b/packages/datadog-instrumentations/test/url.spec.js new file mode 100644 index 00000000000..defb8f08193 --- /dev/null +++ b/packages/datadog-instrumentations/test/url.spec.js @@ -0,0 +1,114 @@ +'use strict' + +const agent = require('../../dd-trace/test/plugins/agent') +const { channel } = require('../src/helpers/instrument') +const names = ['url', 'node:url'] + +names.forEach(name => { + describe(name, () => { + const url = require(name) + const parseFinishedChannel = channel('datadog:url:parse:finish') + const urlGetterChannel = channel('datadog:url:getter:finish') + let parseFinishedChannelCb, urlGetterChannelCb + + before(async () => { + await agent.load('url') + }) + + after(() => { + return agent.close() + }) + + beforeEach(() => { + parseFinishedChannelCb = sinon.stub() + urlGetterChannelCb = sinon.stub() + parseFinishedChannel.subscribe(parseFinishedChannelCb) + urlGetterChannel.subscribe(urlGetterChannelCb) + }) + + afterEach(() => { + parseFinishedChannel.unsubscribe(parseFinishedChannelCb) + urlGetterChannel.unsubscribe(urlGetterChannelCb) + }) + + describe('url.parse', () => { + it('should publish', () => { + // eslint-disable-next-line n/no-deprecated-api + const result = url.parse('https://www.datadoghq.com') + + sinon.assert.calledOnceWithExactly(parseFinishedChannelCb, { + input: 'https://www.datadoghq.com', + parsed: result, + isURL: false + }, sinon.match.any) + }) + }) + + describe('url.URL', () => { + describe('new URL', () => { + it('should publish with input', () => { + const result = new url.URL('https://www.datadoghq.com') + + sinon.assert.calledOnceWithExactly(parseFinishedChannelCb, { + input: 'https://www.datadoghq.com', + base: undefined, + parsed: result, + isURL: true + }, sinon.match.any) + }) + + it('should publish with base and input', () => { + const result = new url.URL('/path', 'https://www.datadoghq.com') + + sinon.assert.calledOnceWithExactly(parseFinishedChannelCb, { + base: 'https://www.datadoghq.com', + input: '/path', + parsed: result, + isURL: true + }, sinon.match.any) + }) + + ;['host', 'origin', 'hostname'].forEach(property => { + it(`should publish on get ${property}`, () => { + const urlObject = new url.URL('/path', 'https://www.datadoghq.com') + + const result = urlObject[property] + + sinon.assert.calledWithExactly(urlGetterChannelCb, { + urlObject, + result, + property + }, sinon.match.any) + }) + }) + }) + }) + + if (url.URL.parse) { // added in v22.1.0 + describe('url.URL.parse', () => { + it('should publish with input', () => { + const input = 'https://www.datadoghq.com' + const parsed = url.URL.parse(input) + + sinon.assert.calledOnceWithExactly(parseFinishedChannelCb, { + input, + parsed, + base: undefined, + isURL: true + }, sinon.match.any) + }) + + it('should publish with base and input', () => { + const result = new url.URL('/path', 'https://www.datadoghq.com') + + sinon.assert.calledOnceWithExactly(parseFinishedChannelCb, { + base: 'https://www.datadoghq.com', + input: '/path', + parsed: result, + isURL: true + }, sinon.match.any) + }) + }) + } + }) +}) diff --git a/packages/datadog-plugin-amqplib/src/consumer.js b/packages/datadog-plugin-amqplib/src/consumer.js index 92684e3f9dc..accd04568b1 100644 --- a/packages/datadog-plugin-amqplib/src/consumer.js +++ b/packages/datadog-plugin-amqplib/src/consumer.js @@ -9,17 +9,18 @@ class AmqplibConsumerPlugin extends ConsumerPlugin { static get id () { return 'amqplib' } static get operation () { return 'command' } - start ({ method, fields, message }) { + start ({ method, fields, message, queue }) { if (method !== 'basic.deliver' && method !== 'basic.get') return const childOf = extract(this.tracer, message) + const queueName = queue || fields.queue || fields.routingKey const span = this.startSpan({ childOf, resource: getResourceName(method, fields), type: 'worker', meta: { - 'amqp.queue': fields.queue, + 'amqp.queue': queueName, 'amqp.exchange': fields.exchange, 'amqp.routingKey': fields.routingKey, 'amqp.consumerTag': fields.consumerTag, @@ -32,10 +33,9 @@ class AmqplibConsumerPlugin extends ConsumerPlugin { this.config.dsmEnabled && message?.properties?.headers ) { const payloadSize = getAmqpMessageSize({ headers: message.properties.headers, content: message.content }) - const queue = fields.queue ? fields.queue : fields.routingKey this.tracer.decodeDataStreamsContext(message.properties.headers) this.tracer - .setCheckpoint(['direction:in', `topic:${queue}`, 'type:rabbitmq'], span, payloadSize) + .setCheckpoint(['direction:in', `topic:${queueName}`, 'type:rabbitmq'], span, payloadSize) } } } diff --git a/packages/datadog-plugin-amqplib/test/index.spec.js b/packages/datadog-plugin-amqplib/test/index.spec.js index d65a5c99338..3aa34145ffe 100644 --- a/packages/datadog-plugin-amqplib/test/index.spec.js +++ b/packages/datadog-plugin-amqplib/test/index.spec.js @@ -324,16 +324,22 @@ describe('Plugin', () => { it('Should emit DSM stats to the agent when sending a message', done => { agent.expectPipelineStats(dsmStats => { - let statsPointsReceived = 0 + let statsPointsReceived = [] // we should have 1 dsm stats points dsmStats.forEach((timeStatsBucket) => { if (timeStatsBucket && timeStatsBucket.Stats) { timeStatsBucket.Stats.forEach((statsBuckets) => { - statsPointsReceived += statsBuckets.Stats.length + statsPointsReceived = statsPointsReceived.concat(statsBuckets.Stats) }) } }) - expect(statsPointsReceived).to.be.at.least(1) + expect(statsPointsReceived.length).to.be.at.least(1) + expect(statsPointsReceived[0].EdgeTags).to.deep.equal([ + 'direction:out', + 'exchange:', + 'has_routing_key:true', + 'type:rabbitmq' + ]) expect(agent.dsmStatsExist(agent, expectedProducerHash)).to.equal(true) }, { timeoutMs: 10000 }).then(done, done) @@ -346,16 +352,18 @@ describe('Plugin', () => { it('Should emit DSM stats to the agent when receiving a message', done => { agent.expectPipelineStats(dsmStats => { - let statsPointsReceived = 0 + let statsPointsReceived = [] // we should have 2 dsm stats points dsmStats.forEach((timeStatsBucket) => { if (timeStatsBucket && timeStatsBucket.Stats) { timeStatsBucket.Stats.forEach((statsBuckets) => { - statsPointsReceived += statsBuckets.Stats.length + statsPointsReceived = statsPointsReceived.concat(statsBuckets.Stats) }) } }) - expect(statsPointsReceived).to.be.at.least(1) + expect(statsPointsReceived.length).to.be.at.least(1) + expect(statsPointsReceived[0].EdgeTags).to.deep.equal( + ['direction:in', 'topic:testDSM', 'type:rabbitmq']) expect(agent.dsmStatsExist(agent, expectedConsumerHash)).to.equal(true) }, { timeoutMs: 10000 }).then(done, done) @@ -368,6 +376,60 @@ describe('Plugin', () => { }) }) + it('Should emit DSM stats to the agent when sending another message', done => { + agent.expectPipelineStats(dsmStats => { + let statsPointsReceived = [] + // we should have 1 dsm stats points + dsmStats.forEach((timeStatsBucket) => { + if (timeStatsBucket && timeStatsBucket.Stats) { + timeStatsBucket.Stats.forEach((statsBuckets) => { + statsPointsReceived = statsPointsReceived.concat(statsBuckets.Stats) + }) + } + }) + expect(statsPointsReceived.length).to.be.at.least(1) + expect(statsPointsReceived[0].EdgeTags).to.deep.equal([ + 'direction:out', + 'exchange:', + 'has_routing_key:true', + 'type:rabbitmq' + ]) + expect(agent.dsmStatsExist(agent, expectedProducerHash)).to.equal(true) + }, { timeoutMs: 10000 }).then(done, done) + + channel.assertQueue('testDSM', {}, (err, ok) => { + if (err) return done(err) + + channel.sendToQueue(ok.queue, Buffer.from('DSM pathway test')) + }) + }) + + it('Should emit DSM stats to the agent when receiving a message with get', done => { + agent.expectPipelineStats(dsmStats => { + let statsPointsReceived = [] + // we should have 2 dsm stats points + dsmStats.forEach((timeStatsBucket) => { + if (timeStatsBucket && timeStatsBucket.Stats) { + timeStatsBucket.Stats.forEach((statsBuckets) => { + statsPointsReceived = statsPointsReceived.concat(statsBuckets.Stats) + }) + } + }) + expect(statsPointsReceived.length).to.be.at.least(1) + expect(statsPointsReceived[0].EdgeTags).to.deep.equal( + ['direction:in', 'topic:testDSM', 'type:rabbitmq']) + expect(agent.dsmStatsExist(agent, expectedConsumerHash)).to.equal(true) + }, { timeoutMs: 10000 }).then(done, done) + + channel.assertQueue('testDSM', {}, (err, ok) => { + if (err) return done(err) + + channel.get(ok.queue, {}, (err, ok) => { + if (err) done(err) + }) + }) + }) + it('Should set pathway hash tag on a span when producing', (done) => { channel.assertQueue('testDSM', {}, (err, ok) => { if (err) return done(err) diff --git a/packages/datadog-plugin-aws-sdk/src/services/eventbridge.js b/packages/datadog-plugin-aws-sdk/src/services/eventbridge.js index 9309411564a..b316f75e6be 100644 --- a/packages/datadog-plugin-aws-sdk/src/services/eventbridge.js +++ b/packages/datadog-plugin-aws-sdk/src/services/eventbridge.js @@ -4,6 +4,7 @@ const BaseAwsSdkPlugin = require('../base') class EventBridge extends BaseAwsSdkPlugin { static get id () { return 'eventbridge' } + static get isPayloadReporter () { return true } generateTags (params, operation, response) { if (!params || !params.source) return {} diff --git a/packages/datadog-plugin-aws-sdk/src/services/kinesis.js b/packages/datadog-plugin-aws-sdk/src/services/kinesis.js index 60802bfc448..dd139e5a608 100644 --- a/packages/datadog-plugin-aws-sdk/src/services/kinesis.js +++ b/packages/datadog-plugin-aws-sdk/src/services/kinesis.js @@ -10,6 +10,7 @@ const { storage } = require('../../../datadog-core') class Kinesis extends BaseAwsSdkPlugin { static get id () { return 'kinesis' } static get peerServicePrecursors () { return ['streamname'] } + static get isPayloadReporter () { return true } constructor (...args) { super(...args) diff --git a/packages/datadog-plugin-aws-sdk/src/services/s3.js b/packages/datadog-plugin-aws-sdk/src/services/s3.js index c306c7ba0a8..0b6da57f3c9 100644 --- a/packages/datadog-plugin-aws-sdk/src/services/s3.js +++ b/packages/datadog-plugin-aws-sdk/src/services/s3.js @@ -5,6 +5,7 @@ const BaseAwsSdkPlugin = require('../base') class S3 extends BaseAwsSdkPlugin { static get id () { return 's3' } static get peerServicePrecursors () { return ['bucketname'] } + static get isPayloadReporter () { return true } generateTags (params, operation, response) { const tags = {} diff --git a/packages/datadog-plugin-aws-sdk/src/services/sqs.js b/packages/datadog-plugin-aws-sdk/src/services/sqs.js index 54a3e7e756c..38a5d03c775 100644 --- a/packages/datadog-plugin-aws-sdk/src/services/sqs.js +++ b/packages/datadog-plugin-aws-sdk/src/services/sqs.js @@ -9,6 +9,7 @@ const { DsmPathwayCodec } = require('../../../dd-trace/src/datastreams/pathway') class Sqs extends BaseAwsSdkPlugin { static get id () { return 'sqs' } static get peerServicePrecursors () { return ['queuename'] } + static get isPayloadReporter () { return true } constructor (...args) { super(...args) diff --git a/packages/datadog-plugin-aws-sdk/test/aws-sdk.spec.js b/packages/datadog-plugin-aws-sdk/test/aws-sdk.spec.js index 4f68f5fbf94..848b00855d4 100644 --- a/packages/datadog-plugin-aws-sdk/test/aws-sdk.spec.js +++ b/packages/datadog-plugin-aws-sdk/test/aws-sdk.spec.js @@ -114,28 +114,6 @@ describe('Plugin', () => { s3.listBuckets({}, e => e && done(e)) }) - // different versions of aws-sdk use different casings and different AWS headers - it('should include tracing headers and not cause a 403 error', (done) => { - const HttpClientPlugin = require('../../datadog-plugin-http/src/client.js') - const spy = sinon.spy(HttpClientPlugin.prototype, 'bindStart') - agent.use(traces => { - const headers = new Set( - Object.keys(spy.firstCall.firstArg.args.options.headers) - .map(x => x.toLowerCase()) - ) - spy.restore() - - expect(headers).to.include('authorization') - expect(headers).to.include('x-amz-date') - expect(headers).to.include('x-datadog-trace-id') - expect(headers).to.include('x-datadog-parent-id') - expect(headers).to.include('x-datadog-sampling-priority') - expect(headers).to.include('x-datadog-tags') - }).then(done, done) - - s3.listBuckets({}, e => e && done(e)) - }) - it('should mark error responses', (done) => { let error diff --git a/packages/datadog-plugin-aws-sdk/test/eventbridge.spec.js b/packages/datadog-plugin-aws-sdk/test/eventbridge.spec.js index fbe77151d4c..3f65acdab0b 100644 --- a/packages/datadog-plugin-aws-sdk/test/eventbridge.spec.js +++ b/packages/datadog-plugin-aws-sdk/test/eventbridge.spec.js @@ -27,6 +27,7 @@ describe('EventBridge', () => { _traceFlags: { sampled: 1 }, + _baggageItems: {}, 'x-datadog-trace-id': traceId, 'x-datadog-parent-id': parentId, 'x-datadog-sampling-priority': '1', diff --git a/packages/datadog-plugin-azure-functions/test/integration-test/fixtures/package.json b/packages/datadog-plugin-azure-functions/test/integration-test/fixtures/package.json index 07b0ac311ee..f17f97669ab 100644 --- a/packages/datadog-plugin-azure-functions/test/integration-test/fixtures/package.json +++ b/packages/datadog-plugin-azure-functions/test/integration-test/fixtures/package.json @@ -7,7 +7,7 @@ "start": "func start" }, "dependencies": { - "@azure/functions": "^4.0.0" + "@azure/functions": "^4.6.0" }, "devDependencies": { "azure-functions-core-tools": "^4.x" diff --git a/packages/datadog-plugin-azure-functions/test/integration-test/fixtures/yarn.lock b/packages/datadog-plugin-azure-functions/test/integration-test/fixtures/yarn.lock index 98c420c8953..bceddf8fcad 100644 --- a/packages/datadog-plugin-azure-functions/test/integration-test/fixtures/yarn.lock +++ b/packages/datadog-plugin-azure-functions/test/integration-test/fixtures/yarn.lock @@ -2,12 +2,12 @@ # yarn lockfile v1 -"@azure/functions@^4.0.0": - version "4.5.1" - resolved "https://registry.yarnpkg.com/@azure/functions/-/functions-4.5.1.tgz#70d1a99d335af87579a55d3c149ef1ae77da0a66" - integrity sha512-ikiw1IrM2W9NlQM3XazcX+4Sq3XAjZi4eeG22B5InKC2x5i7MatGF2S/Gn1ACZ+fEInwu+Ru9J8DlnBv1/hIvg== +"@azure/functions@^4.6.0": + version "4.6.0" + resolved "https://registry.yarnpkg.com/@azure/functions/-/functions-4.6.0.tgz#eee9ca945b8a2f2d0748c28006e057178cd5f8c9" + integrity sha512-vGq9jXlgrJ3KaI8bepgfpk26zVY8vFZsQukF85qjjKTAR90eFOOBNaa+mc/0ViDY2lcdrU2fL/o1pQyZUtTDsw== dependencies: - cookie "^0.6.0" + cookie "^0.7.0" long "^4.0.0" undici "^5.13.0" @@ -92,10 +92,10 @@ color-name@~1.1.4: resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.4.tgz#c2a09a87acbde69543de6f63fa3995c826c536a2" integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA== -cookie@^0.6.0: - version "0.6.0" - resolved "https://registry.yarnpkg.com/cookie/-/cookie-0.6.0.tgz#2798b04b071b0ecbff0dbb62a505a8efa4e19051" - integrity sha512-U71cyTamuh1CRNCfpGY6to28lxvNwPG4Guz/EVjgf3Jmzv0vlDp1atT9eS5dDjMYHucpHbWns6Lwf3BKz6svdw== +cookie@^0.7.0: + version "0.7.2" + resolved "https://registry.yarnpkg.com/cookie/-/cookie-0.7.2.tgz#556369c472a2ba910f2979891b526b3436237ed7" + integrity sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w== debug@4, debug@^4.1.1: version "4.3.7" diff --git a/packages/datadog-plugin-fastify/src/code_origin.js b/packages/datadog-plugin-fastify/src/code_origin.js index 3e6f58d5624..6c9ddc7b028 100644 --- a/packages/datadog-plugin-fastify/src/code_origin.js +++ b/packages/datadog-plugin-fastify/src/code_origin.js @@ -1,6 +1,6 @@ 'use strict' -const { entryTag } = require('../../datadog-code-origin') +const { entryTags } = require('../../datadog-code-origin') const Plugin = require('../../dd-trace/src/plugins/plugin') const web = require('../../dd-trace/src/plugins/util/web') @@ -23,7 +23,7 @@ class FastifyCodeOriginForSpansPlugin extends Plugin { this.addSub('apm:fastify:route:added', ({ routeOptions, onRoute }) => { if (!routeOptions.config) routeOptions.config = {} - routeOptions.config[kCodeOriginForSpansTagsSym] = entryTag(onRoute) + routeOptions.config[kCodeOriginForSpansTagsSym] = entryTags(onRoute) }) } } diff --git a/packages/datadog-plugin-fastify/test/code_origin.spec.js b/packages/datadog-plugin-fastify/test/code_origin.spec.js index 711c2ffff6c..18f591dc6b9 100644 --- a/packages/datadog-plugin-fastify/test/code_origin.spec.js +++ b/packages/datadog-plugin-fastify/test/code_origin.spec.js @@ -3,6 +3,7 @@ const axios = require('axios') const semver = require('semver') const agent = require('../../dd-trace/test/plugins/agent') +const { getNextLineNumber } = require('../../dd-trace/test/plugins/helpers') const { NODE_MAJOR } = require('../../../version') const host = 'localhost' @@ -49,13 +50,13 @@ describe('Plugin', () => { // Wrap in a named function to have at least one frame with a function name function wrapperFunction () { - routeRegisterLine = getNextLineNumber() + routeRegisterLine = String(getNextLineNumber()) app.get('/user', function userHandler (request, reply) { reply.send() }) } - const callWrapperLine = getNextLineNumber() + const callWrapperLine = String(getNextLineNumber()) wrapperFunction() app.listen(() => { @@ -95,7 +96,7 @@ describe('Plugin', () => { let routeRegisterLine app.register(function v1Handler (app, opts, done) { - routeRegisterLine = getNextLineNumber() + routeRegisterLine = String(getNextLineNumber()) app.get('/user', function userHandler (request, reply) { reply.send() }) @@ -134,7 +135,7 @@ describe('Plugin', () => { next() }) - const routeRegisterLine = getNextLineNumber() + const routeRegisterLine = String(getNextLineNumber()) app.get('/user', function userHandler (request, reply) { reply.send() }) @@ -170,7 +171,7 @@ describe('Plugin', () => { // number of where the route handler is defined. However, this might not be the right choice and it might be // better to point to the middleware. it.skip('should point to middleware if middleware responds early', function testCase (done) { - const middlewareRegisterLine = getNextLineNumber() + const middlewareRegisterLine = String(getNextLineNumber()) app.use(function middleware (req, res, next) { res.end() }) @@ -210,7 +211,3 @@ describe('Plugin', () => { }) }) }) - -function getNextLineNumber () { - return String(Number(new Error().stack.split('\n')[2].match(/:(\d+):/)[1]) + 1) -} diff --git a/packages/datadog-plugin-fetch/test/index.spec.js b/packages/datadog-plugin-fetch/test/index.spec.js index b469f4a9722..1d322de04a4 100644 --- a/packages/datadog-plugin-fetch/test/index.spec.js +++ b/packages/datadog-plugin-fetch/test/index.spec.js @@ -215,6 +215,102 @@ describe('Plugin', () => { }) }) + it('should skip injecting if the Authorization header contains an AWS signature', done => { + const app = express() + + app.get('/', (req, res) => { + try { + expect(req.get('x-datadog-trace-id')).to.be.undefined + expect(req.get('x-datadog-parent-id')).to.be.undefined + + res.status(200).send() + + done() + } catch (e) { + done(e) + } + }) + + appListener = server(app, port => { + fetch(`http://localhost:${port}/`, { + headers: { + Authorization: 'AWS4-HMAC-SHA256 ...' + } + }) + }) + }) + + it('should skip injecting if one of the Authorization headers contains an AWS signature', done => { + const app = express() + + app.get('/', (req, res) => { + try { + expect(req.get('x-datadog-trace-id')).to.be.undefined + expect(req.get('x-datadog-parent-id')).to.be.undefined + + res.status(200).send() + + done() + } catch (e) { + done(e) + } + }) + + appListener = server(app, port => { + fetch(`http://localhost:${port}/`, { + headers: { + Authorization: ['AWS4-HMAC-SHA256 ...'] + } + }) + }) + }) + + it('should skip injecting if the X-Amz-Signature header is set', done => { + const app = express() + + app.get('/', (req, res) => { + try { + expect(req.get('x-datadog-trace-id')).to.be.undefined + expect(req.get('x-datadog-parent-id')).to.be.undefined + + res.status(200).send() + + done() + } catch (e) { + done(e) + } + }) + + appListener = server(app, port => { + fetch(`http://localhost:${port}/`, { + headers: { + 'X-Amz-Signature': 'abc123' + } + }) + }) + }) + + it('should skip injecting if the X-Amz-Signature query param is set', done => { + const app = express() + + app.get('/', (req, res) => { + try { + expect(req.get('x-datadog-trace-id')).to.be.undefined + expect(req.get('x-datadog-parent-id')).to.be.undefined + + res.status(200).send() + + done() + } catch (e) { + done(e) + } + }) + + appListener = server(app, port => { + fetch(`http://localhost:${port}/?X-Amz-Signature=abc123`) + }) + }) + it('should handle connection errors', done => { let error diff --git a/packages/datadog-plugin-google-cloud-pubsub/src/consumer.js b/packages/datadog-plugin-google-cloud-pubsub/src/consumer.js index 3a330ad4c3a..84c4122ec57 100644 --- a/packages/datadog-plugin-google-cloud-pubsub/src/consumer.js +++ b/packages/datadog-plugin-google-cloud-pubsub/src/consumer.js @@ -1,5 +1,6 @@ 'use strict' +const { getMessageSize } = require('../../dd-trace/src/datastreams/processor') const ConsumerPlugin = require('../../dd-trace/src/plugins/consumer') class GoogleCloudPubsubConsumerPlugin extends ConsumerPlugin { @@ -11,7 +12,7 @@ class GoogleCloudPubsubConsumerPlugin extends ConsumerPlugin { const topic = subscription.metadata && subscription.metadata.topic const childOf = this.tracer.extract('text_map', message.attributes) || null - this.startSpan({ + const span = this.startSpan({ childOf, resource: topic, type: 'worker', @@ -23,6 +24,12 @@ class GoogleCloudPubsubConsumerPlugin extends ConsumerPlugin { 'pubsub.ack': 0 } }) + if (this.config.dsmEnabled && message?.attributes) { + const payloadSize = getMessageSize(message) + this.tracer.decodeDataStreamsContext(message.attributes) + this.tracer + .setCheckpoint(['direction:in', `topic:${topic}`, 'type:google-pubsub'], span, payloadSize) + } } finish (message) { diff --git a/packages/datadog-plugin-google-cloud-pubsub/src/producer.js b/packages/datadog-plugin-google-cloud-pubsub/src/producer.js index a34d6bfacd8..b6261ee85b6 100644 --- a/packages/datadog-plugin-google-cloud-pubsub/src/producer.js +++ b/packages/datadog-plugin-google-cloud-pubsub/src/producer.js @@ -1,6 +1,8 @@ 'use strict' const ProducerPlugin = require('../../dd-trace/src/plugins/producer') +const { DsmPathwayCodec } = require('../../dd-trace/src/datastreams/pathway') +const { getHeadersSize } = require('../../dd-trace/src/datastreams/processor') class GoogleCloudPubsubProducerPlugin extends ProducerPlugin { static get id () { return 'google-cloud-pubsub' } @@ -25,6 +27,12 @@ class GoogleCloudPubsubProducerPlugin extends ProducerPlugin { msg.attributes = {} } this.tracer.inject(span, 'text_map', msg.attributes) + if (this.config.dsmEnabled) { + const payloadSize = getHeadersSize(msg) + const dataStreamsContext = this.tracer + .setCheckpoint(['direction:out', `topic:${topic}`, 'type:google-pubsub'], span, payloadSize) + DsmPathwayCodec.encode(dataStreamsContext, msg.attributes) + } } } } diff --git a/packages/datadog-plugin-google-cloud-pubsub/test/index.spec.js b/packages/datadog-plugin-google-cloud-pubsub/test/index.spec.js index 89a0c5f03b8..80bc5f9509d 100644 --- a/packages/datadog-plugin-google-cloud-pubsub/test/index.spec.js +++ b/packages/datadog-plugin-google-cloud-pubsub/test/index.spec.js @@ -6,9 +6,12 @@ const id = require('../../dd-trace/src/id') const { ERROR_MESSAGE, ERROR_TYPE, ERROR_STACK } = require('../../dd-trace/src/constants') const { expectedSchema, rawExpectedSchema } = require('./naming') +const { computePathwayHash } = require('../../dd-trace/src/datastreams/pathway') +const { ENTRY_PARENT_HASH, DataStreamsProcessor } = require('../../dd-trace/src/datastreams/processor') // The roundtrip to the pubsub emulator takes time. Sometimes a *long* time. const TIMEOUT = 30000 +const dsmTopicName = 'dsm-topic' describe('Plugin', () => { let tracer @@ -18,6 +21,7 @@ describe('Plugin', () => { before(() => { process.env.PUBSUB_EMULATOR_HOST = 'localhost:8081' + process.env.DD_DATA_STREAMS_ENABLED = true }) after(() => { @@ -34,10 +38,12 @@ describe('Plugin', () => { let resource let v1 let gax + let expectedProducerHash + let expectedConsumerHash describe('without configuration', () => { beforeEach(() => { - return agent.load('google-cloud-pubsub') + return agent.load('google-cloud-pubsub', { dsmEnabled: false }) }) beforeEach(() => { @@ -296,7 +302,8 @@ describe('Plugin', () => { describe('with configuration', () => { beforeEach(() => { return agent.load('google-cloud-pubsub', { - service: 'a_test_service' + service: 'a_test_service', + dsmEnabled: false }) }) @@ -322,6 +329,113 @@ describe('Plugin', () => { }) }) + describe('data stream monitoring', () => { + let dsmTopic + let sub + let consume + + beforeEach(() => { + return agent.load('google-cloud-pubsub', { + dsmEnabled: true + }) + }) + + before(async () => { + const { PubSub } = require(`../../../versions/@google-cloud/pubsub@${version}`).get() + project = getProjectId() + resource = `projects/${project}/topics/${dsmTopicName}` + pubsub = new PubSub({ projectId: project }) + tracer.use('google-cloud-pubsub', { dsmEnabled: true }) + + dsmTopic = await pubsub.createTopic(dsmTopicName) + dsmTopic = dsmTopic[0] + sub = await dsmTopic.createSubscription('DSM') + sub = sub[0] + consume = function (cb) { + sub.on('message', cb) + } + + const dsmFullTopic = `projects/${project}/topics/${dsmTopicName}` + + expectedProducerHash = computePathwayHash( + 'test', + 'tester', + ['direction:out', 'topic:' + dsmFullTopic, 'type:google-pubsub'], + ENTRY_PARENT_HASH + ) + expectedConsumerHash = computePathwayHash( + 'test', + 'tester', + ['direction:in', 'topic:' + dsmFullTopic, 'type:google-pubsub'], + expectedProducerHash + ) + }) + + describe('should set a DSM checkpoint', () => { + it('on produce', async () => { + await publish(dsmTopic, { data: Buffer.from('DSM produce checkpoint') }) + + agent.expectPipelineStats(dsmStats => { + let statsPointsReceived = 0 + // we should have 1 dsm stats points + dsmStats.forEach((timeStatsBucket) => { + if (timeStatsBucket && timeStatsBucket.Stats) { + timeStatsBucket.Stats.forEach((statsBuckets) => { + statsPointsReceived += statsBuckets.Stats.length + }) + } + }) + expect(statsPointsReceived).to.be.at.least(1) + expect(agent.dsmStatsExist(agent, expectedProducerHash.readBigUInt64BE(0).toString())).to.equal(true) + }, { timeoutMs: TIMEOUT }) + }) + + it('on consume', async () => { + await publish(dsmTopic, { data: Buffer.from('DSM consume checkpoint') }) + await consume(async () => { + agent.expectPipelineStats(dsmStats => { + let statsPointsReceived = 0 + // we should have 2 dsm stats points + dsmStats.forEach((timeStatsBucket) => { + if (timeStatsBucket && timeStatsBucket.Stats) { + timeStatsBucket.Stats.forEach((statsBuckets) => { + statsPointsReceived += statsBuckets.Stats.length + }) + } + }) + expect(statsPointsReceived).to.be.at.least(2) + expect(agent.dsmStatsExist(agent, expectedConsumerHash.readBigUInt64BE(0).toString())).to.equal(true) + }, { timeoutMs: TIMEOUT }) + }) + }) + }) + + describe('it should set a message payload size', () => { + let recordCheckpointSpy + + beforeEach(() => { + recordCheckpointSpy = sinon.spy(DataStreamsProcessor.prototype, 'recordCheckpoint') + }) + + afterEach(() => { + DataStreamsProcessor.prototype.recordCheckpoint.restore() + }) + + it('when producing a message', async () => { + await publish(dsmTopic, { data: Buffer.from('DSM produce payload size') }) + expect(recordCheckpointSpy.args[0][0].hasOwnProperty('payloadSize')) + }) + + it('when consuming a message', async () => { + await publish(dsmTopic, { data: Buffer.from('DSM consume payload size') }) + + await consume(async () => { + expect(recordCheckpointSpy.args[0][0].hasOwnProperty('payloadSize')) + }) + }) + }) + }) + function expectSpanWithDefaults (expected) { const prefixedResource = [expected.meta['pubsub.method'], resource].filter(x => x).join(' ') const service = expected.meta['pubsub.method'] ? 'test-pubsub' : 'test' diff --git a/packages/datadog-plugin-http/src/client.js b/packages/datadog-plugin-http/src/client.js index 55a025f4970..d4c105d2508 100644 --- a/packages/datadog-plugin-http/src/client.js +++ b/packages/datadog-plugin-http/src/client.js @@ -58,7 +58,7 @@ class HttpClientPlugin extends ClientPlugin { span._spanContext._trace.record = false } - if (this.config.propagationFilter(uri)) { + if (this.shouldInjectTraceHeaders(options, uri)) { this.tracer.inject(span, HTTP_HEADERS, options.headers) } @@ -71,6 +71,18 @@ class HttpClientPlugin extends ClientPlugin { return message.currentStore } + shouldInjectTraceHeaders (options, uri) { + if (hasAmazonSignature(options) && !this.config.enablePropagationWithAmazonHeaders) { + return false + } + + if (!this.config.propagationFilter(uri)) { + return false + } + + return true + } + bindAsyncStart ({ parentStore }) { return parentStore } @@ -200,6 +212,31 @@ function getHooks (config) { return { request } } +function hasAmazonSignature (options) { + if (!options) { + return false + } + + if (options.headers) { + const headers = Object.keys(options.headers) + .reduce((prev, next) => Object.assign(prev, { + [next.toLowerCase()]: options.headers[next] + }), {}) + + if (headers['x-amz-signature']) { + return true + } + + if ([].concat(headers.authorization).some(startsWith('AWS4-HMAC-SHA256'))) { + return true + } + } + + const search = options.search || options.path + + return search && search.toLowerCase().indexOf('x-amz-signature=') !== -1 +} + function extractSessionDetails (options) { if (typeof options === 'string') { return new URL(options).host @@ -211,4 +248,8 @@ function extractSessionDetails (options) { return { host, port } } +function startsWith (searchString) { + return value => String(value).startsWith(searchString) +} + module.exports = HttpClientPlugin diff --git a/packages/datadog-plugin-http/test/client.spec.js b/packages/datadog-plugin-http/test/client.spec.js index 268aff9b238..42f4c8436f8 100644 --- a/packages/datadog-plugin-http/test/client.spec.js +++ b/packages/datadog-plugin-http/test/client.spec.js @@ -446,6 +446,116 @@ describe('Plugin', () => { }) }) + it('should skip injecting if the Authorization header contains an AWS signature', done => { + const app = express() + + app.get('/', (req, res) => { + try { + expect(req.get('x-datadog-trace-id')).to.be.undefined + expect(req.get('x-datadog-parent-id')).to.be.undefined + + res.status(200).send() + + done() + } catch (e) { + done(e) + } + }) + + appListener = server(app, port => { + const req = http.request({ + port, + headers: { + Authorization: 'AWS4-HMAC-SHA256 ...' + } + }) + + req.end() + }) + }) + + it('should skip injecting if one of the Authorization headers contains an AWS signature', done => { + const app = express() + + app.get('/', (req, res) => { + try { + expect(req.get('x-datadog-trace-id')).to.be.undefined + expect(req.get('x-datadog-parent-id')).to.be.undefined + + res.status(200).send() + + done() + } catch (e) { + done(e) + } + }) + + appListener = server(app, port => { + const req = http.request({ + port, + headers: { + Authorization: ['AWS4-HMAC-SHA256 ...'] + } + }) + + req.end() + }) + }) + + it('should skip injecting if the X-Amz-Signature header is set', done => { + const app = express() + + app.get('/', (req, res) => { + try { + expect(req.get('x-datadog-trace-id')).to.be.undefined + expect(req.get('x-datadog-parent-id')).to.be.undefined + + res.status(200).send() + + done() + } catch (e) { + done(e) + } + }) + + appListener = server(app, port => { + const req = http.request({ + port, + headers: { + 'X-Amz-Signature': 'abc123' + } + }) + + req.end() + }) + }) + + it('should skip injecting if the X-Amz-Signature query param is set', done => { + const app = express() + + app.get('/', (req, res) => { + try { + expect(req.get('x-datadog-trace-id')).to.be.undefined + expect(req.get('x-datadog-parent-id')).to.be.undefined + + res.status(200).send() + + done() + } catch (e) { + done(e) + } + }) + + appListener = server(app, port => { + const req = http.request({ + port, + path: '/?X-Amz-Signature=abc123' + }) + + req.end() + }) + }) + it('should run the callback in the parent context', done => { const app = express() @@ -983,6 +1093,50 @@ describe('Plugin', () => { }) }) + describe('with config enablePropagationWithAmazonHeaders enabled', () => { + let config + + beforeEach(() => { + config = { + enablePropagationWithAmazonHeaders: true + } + + return agent.load('http', config) + .then(() => { + http = require(pluginToBeLoaded) + express = require('express') + }) + }) + + it('should inject tracing header into AWS signed request', done => { + const app = express() + + app.get('/', (req, res) => { + try { + expect(req.get('x-datadog-trace-id')).to.be.a('string') + expect(req.get('x-datadog-parent-id')).to.be.a('string') + + res.status(200).send() + + done() + } catch (e) { + done(e) + } + }) + + appListener = server(app, port => { + const req = http.request({ + port, + headers: { + Authorization: 'AWS4-HMAC-SHA256 ...' + } + }) + + req.end() + }) + }) + }) + describe('with validateStatus configuration', () => { let config diff --git a/packages/datadog-plugin-http/test/code_origin.spec.js b/packages/datadog-plugin-http/test/code_origin.spec.js new file mode 100644 index 00000000000..4bb1a9003e0 --- /dev/null +++ b/packages/datadog-plugin-http/test/code_origin.spec.js @@ -0,0 +1,63 @@ +'use strict' + +const agent = require('../../dd-trace/test/plugins/agent') + +describe('Plugin', () => { + describe('http', () => { + describe('Code Origin for Spans', () => { + before(() => { + // Needed when this spec file run together with other spec files, in which case the agent config is not + // re-loaded unless the existing agent is wiped first. And we need the agent config to be re-loaded in order to + // enable Code Origin for Spans. + agent.wipe() + }) + + beforeEach(async () => { + return agent.load('http', { server: false }, { codeOriginForSpans: { enabled: true } }) + }) + + afterEach(() => { + return agent.close({ ritmReset: false }) + }) + + it('should add code_origin tags for outbound requests', done => { + server((port) => { + const http = require('http') + + agent + .use(traces => { + const span = traces[0][0] + expect(span.meta).to.have.property('_dd.code_origin.type', 'exit') + + // Just validate that frame 0 tags are present. The detailed validation is performed in a different test. + expect(span.meta).to.have.property('_dd.code_origin.frames.0.file') + expect(span.meta).to.have.property('_dd.code_origin.frames.0.line') + expect(span.meta).to.have.property('_dd.code_origin.frames.0.column') + expect(span.meta).to.have.property('_dd.code_origin.frames.0.method') + expect(span.meta).to.have.property('_dd.code_origin.frames.0.type') + }) + .then(done) + .catch(done) + + const req = http.request(`http://localhost:${port}/`, res => { + res.resume() + }) + + req.end() + }) + }) + }) + }) +}) + +function server (callback) { + const http = require('http') + + const server = http.createServer((req, res) => { + res.end() + }) + + server.listen(() => { + callback(server.address().port) + }) +} diff --git a/packages/datadog-plugin-http2/src/client.js b/packages/datadog-plugin-http2/src/client.js index 3f8d996fcd3..296f1161e59 100644 --- a/packages/datadog-plugin-http2/src/client.js +++ b/packages/datadog-plugin-http2/src/client.js @@ -62,7 +62,9 @@ class Http2ClientPlugin extends ClientPlugin { addHeaderTags(span, headers, HTTP_REQUEST_HEADERS, this.config) - this.tracer.inject(span, HTTP_HEADERS, headers) + if (!hasAmazonSignature(headers, path)) { + this.tracer.inject(span, HTTP_HEADERS, headers) + } message.parentStore = store message.currentStore = { ...store, span } @@ -132,6 +134,29 @@ function extractSessionDetails (authority, options) { return { protocol, port, host } } +function hasAmazonSignature (headers, path) { + if (headers) { + headers = Object.keys(headers) + .reduce((prev, next) => Object.assign(prev, { + [next.toLowerCase()]: headers[next] + }), {}) + + if (headers['x-amz-signature']) { + return true + } + + if ([].concat(headers.authorization).some(startsWith('AWS4-HMAC-SHA256'))) { + return true + } + } + + return path && path.toLowerCase().indexOf('x-amz-signature=') !== -1 +} + +function startsWith (searchString) { + return value => String(value).startsWith(searchString) +} + function getStatusValidator (config) { if (typeof config.validateStatus === 'function') { return config.validateStatus diff --git a/packages/datadog-plugin-http2/test/client.spec.js b/packages/datadog-plugin-http2/test/client.spec.js index cfdedcde489..f8d44f3ac0b 100644 --- a/packages/datadog-plugin-http2/test/client.spec.js +++ b/packages/datadog-plugin-http2/test/client.spec.js @@ -365,6 +365,131 @@ describe('Plugin', () => { }) }) + it('should skip injecting if the Authorization header contains an AWS signature', done => { + const app = (stream, headers) => { + try { + expect(headers['x-datadog-trace-id']).to.be.undefined + expect(headers['x-datadog-parent-id']).to.be.undefined + + stream.respond({ + ':status': 200 + }) + stream.end() + + done() + } catch (e) { + done(e) + } + } + + appListener = server(app, port => { + const headers = { + Authorization: 'AWS4-HMAC-SHA256 ...' + } + const client = http2 + .connect(`${protocol}://localhost:${port}`) + .on('error', done) + + const req = client.request(headers) + req.on('error', done) + + req.end() + }) + }) + + it('should skip injecting if one of the Authorization headers contains an AWS signature', done => { + const app = (stream, headers) => { + try { + expect(headers['x-datadog-trace-id']).to.be.undefined + expect(headers['x-datadog-parent-id']).to.be.undefined + + stream.respond({ + ':status': 200 + }) + stream.end() + + done() + } catch (e) { + done(e) + } + } + + appListener = server(app, port => { + const headers = { + Authorization: ['AWS4-HMAC-SHA256 ...'] + } + const client = http2 + .connect(`${protocol}://localhost:${port}`) + .on('error', done) + + const req = client.request(headers) + req.on('error', done) + + req.end() + }) + }) + + it('should skip injecting if the X-Amz-Signature header is set', done => { + const app = (stream, headers) => { + try { + expect(headers['x-datadog-trace-id']).to.be.undefined + expect(headers['x-datadog-parent-id']).to.be.undefined + + stream.respond({ + ':status': 200 + }) + stream.end() + + done() + } catch (e) { + done(e) + } + } + + appListener = server(app, port => { + const headers = { + 'X-Amz-Signature': 'abc123' + } + const client = http2 + .connect(`${protocol}://localhost:${port}`) + .on('error', done) + + const req = client.request(headers) + req.on('error', done) + + req.end() + }) + }) + + it('should skip injecting if the X-Amz-Signature query param is set', done => { + const app = (stream, headers) => { + try { + expect(headers['x-datadog-trace-id']).to.be.undefined + expect(headers['x-datadog-parent-id']).to.be.undefined + + stream.respond({ + ':status': 200 + }) + stream.end() + + done() + } catch (e) { + done(e) + } + } + + appListener = server(app, port => { + const client = http2 + .connect(`${protocol}://localhost:${port}`) + .on('error', done) + + const req = client.request({ ':path': '/?X-Amz-Signature=abc123' }) + req.on('error', done) + + req.end() + }) + }) + it('should run the callback in the parent context', done => { const app = (stream, headers) => { stream.respond({ diff --git a/packages/datadog-plugin-kafkajs/src/batch-consumer.js b/packages/datadog-plugin-kafkajs/src/batch-consumer.js index 8415b037644..e0228a018c2 100644 --- a/packages/datadog-plugin-kafkajs/src/batch-consumer.js +++ b/packages/datadog-plugin-kafkajs/src/batch-consumer.js @@ -5,14 +5,17 @@ class KafkajsBatchConsumerPlugin extends ConsumerPlugin { static get id () { return 'kafkajs' } static get operation () { return 'consume-batch' } - start ({ topic, partition, messages, groupId }) { + start ({ topic, partition, messages, groupId, clusterId }) { if (!this.config.dsmEnabled) return for (const message of messages) { if (!message || !message.headers) continue const payloadSize = getMessageSize(message) this.tracer.decodeDataStreamsContext(message.headers) - this.tracer - .setCheckpoint(['direction:in', `group:${groupId}`, `topic:${topic}`, 'type:kafka'], null, payloadSize) + const edgeTags = ['direction:in', `group:${groupId}`, `topic:${topic}`, 'type:kafka'] + if (clusterId) { + edgeTags.push(`kafka_cluster_id:${clusterId}`) + } + this.tracer.setCheckpoint(edgeTags, null, payloadSize) } } } diff --git a/packages/datadog-plugin-kafkajs/src/consumer.js b/packages/datadog-plugin-kafkajs/src/consumer.js index 84b6a02fdda..ee04c5eb60c 100644 --- a/packages/datadog-plugin-kafkajs/src/consumer.js +++ b/packages/datadog-plugin-kafkajs/src/consumer.js @@ -62,7 +62,7 @@ class KafkajsConsumerPlugin extends ConsumerPlugin { } } - start ({ topic, partition, message, groupId }) { + start ({ topic, partition, message, groupId, clusterId }) { const childOf = extract(this.tracer, message.headers) const span = this.startSpan({ childOf, @@ -71,7 +71,8 @@ class KafkajsConsumerPlugin extends ConsumerPlugin { meta: { component: 'kafkajs', 'kafka.topic': topic, - 'kafka.message.offset': message.offset + 'kafka.message.offset': message.offset, + 'kafka.cluster_id': clusterId }, metrics: { 'kafka.partition': partition @@ -80,8 +81,11 @@ class KafkajsConsumerPlugin extends ConsumerPlugin { if (this.config.dsmEnabled && message?.headers) { const payloadSize = getMessageSize(message) this.tracer.decodeDataStreamsContext(message.headers) - this.tracer - .setCheckpoint(['direction:in', `group:${groupId}`, `topic:${topic}`, 'type:kafka'], span, payloadSize) + const edgeTags = ['direction:in', `group:${groupId}`, `topic:${topic}`, 'type:kafka'] + if (clusterId) { + edgeTags.push(`kafka_cluster_id:${clusterId}`) + } + this.tracer.setCheckpoint(edgeTags, span, payloadSize) } if (afterStartCh.hasSubscribers) { diff --git a/packages/datadog-plugin-kafkajs/src/producer.js b/packages/datadog-plugin-kafkajs/src/producer.js index 7b9aff95310..aa12357b4cf 100644 --- a/packages/datadog-plugin-kafkajs/src/producer.js +++ b/packages/datadog-plugin-kafkajs/src/producer.js @@ -66,12 +66,13 @@ class KafkajsProducerPlugin extends ProducerPlugin { } } - start ({ topic, messages, bootstrapServers }) { + start ({ topic, messages, bootstrapServers, clusterId }) { const span = this.startSpan({ resource: topic, meta: { component: 'kafkajs', - 'kafka.topic': topic + 'kafka.topic': topic, + 'kafka.cluster_id': clusterId }, metrics: { 'kafka.batch_size': messages.length @@ -85,8 +86,13 @@ class KafkajsProducerPlugin extends ProducerPlugin { this.tracer.inject(span, 'text_map', message.headers) if (this.config.dsmEnabled) { const payloadSize = getMessageSize(message) - const dataStreamsContext = this.tracer - .setCheckpoint(['direction:out', `topic:${topic}`, 'type:kafka'], span, payloadSize) + const edgeTags = ['direction:out', `topic:${topic}`, 'type:kafka'] + + if (clusterId) { + edgeTags.push(`kafka_cluster_id:${clusterId}`) + } + + const dataStreamsContext = this.tracer.setCheckpoint(edgeTags, span, payloadSize) DsmPathwayCodec.encode(dataStreamsContext, message.headers) } } diff --git a/packages/datadog-plugin-kafkajs/test/index.spec.js b/packages/datadog-plugin-kafkajs/test/index.spec.js index 3df303a95cf..f67279bdd9f 100644 --- a/packages/datadog-plugin-kafkajs/test/index.spec.js +++ b/packages/datadog-plugin-kafkajs/test/index.spec.js @@ -13,18 +13,22 @@ const { computePathwayHash } = require('../../dd-trace/src/datastreams/pathway') const { ENTRY_PARENT_HASH, DataStreamsProcessor } = require('../../dd-trace/src/datastreams/processor') const testTopic = 'test-topic' -const expectedProducerHash = computePathwayHash( - 'test', - 'tester', - ['direction:out', 'topic:' + testTopic, 'type:kafka'], - ENTRY_PARENT_HASH -) -const expectedConsumerHash = computePathwayHash( - 'test', - 'tester', - ['direction:in', 'group:test-group', 'topic:' + testTopic, 'type:kafka'], - expectedProducerHash -) +const testKafkaClusterId = '5L6g3nShT-eMCtK--X86sw' + +const getDsmPathwayHash = (clusterIdAvailable, isProducer, parentHash) => { + let edgeTags + if (isProducer) { + edgeTags = ['direction:out', 'topic:' + testTopic, 'type:kafka'] + } else { + edgeTags = ['direction:in', 'group:test-group', 'topic:' + testTopic, 'type:kafka'] + } + + if (clusterIdAvailable) { + edgeTags.push(`kafka_cluster_id:${testKafkaClusterId}`) + } + edgeTags.sort() + return computePathwayHash('test', 'tester', edgeTags, parentHash) +} describe('Plugin', () => { describe('kafkajs', function () { @@ -38,6 +42,16 @@ describe('Plugin', () => { let kafka let tracer let Kafka + let clusterIdAvailable + let expectedProducerHash + let expectedConsumerHash + + before(() => { + clusterIdAvailable = semver.intersects(version, '>=1.13') + expectedProducerHash = getDsmPathwayHash(clusterIdAvailable, true, ENTRY_PARENT_HASH) + expectedConsumerHash = getDsmPathwayHash(clusterIdAvailable, false, expectedProducerHash) + }) + describe('without configuration', () => { const messages = [{ key: 'key1', value: 'test2' }] @@ -56,14 +70,17 @@ describe('Plugin', () => { describe('producer', () => { it('should be instrumented', async () => { + const meta = { + 'span.kind': 'producer', + component: 'kafkajs', + 'pathway.hash': expectedProducerHash.readBigUInt64BE(0).toString() + } + if (clusterIdAvailable) meta['kafka.cluster_id'] = testKafkaClusterId + const expectedSpanPromise = expectSpanWithDefaults({ name: expectedSchema.send.opName, service: expectedSchema.send.serviceName, - meta: { - 'span.kind': 'producer', - component: 'kafkajs', - 'pathway.hash': expectedProducerHash.readBigUInt64BE(0).toString() - }, + meta, metrics: { 'kafka.batch_size': messages.length }, @@ -353,6 +370,12 @@ describe('Plugin', () => { await consumer.subscribe({ topic: testTopic }) }) + before(() => { + clusterIdAvailable = semver.intersects(version, '>=1.13') + expectedProducerHash = getDsmPathwayHash(clusterIdAvailable, true, ENTRY_PARENT_HASH) + expectedConsumerHash = getDsmPathwayHash(clusterIdAvailable, false, expectedProducerHash) + }) + afterEach(async () => { await consumer.disconnect() }) @@ -368,19 +391,6 @@ describe('Plugin', () => { setDataStreamsContextSpy.restore() }) - const expectedProducerHash = computePathwayHash( - 'test', - 'tester', - ['direction:out', 'topic:' + testTopic, 'type:kafka'], - ENTRY_PARENT_HASH - ) - const expectedConsumerHash = computePathwayHash( - 'test', - 'tester', - ['direction:in', 'group:test-group', 'topic:' + testTopic, 'type:kafka'], - expectedProducerHash - ) - it('Should set a checkpoint on produce', async () => { const messages = [{ key: 'consumerDSM1', value: 'test2' }] await sendMessages(kafka, testTopic, messages) @@ -476,9 +486,9 @@ describe('Plugin', () => { } /** - * No choice but to reinitialize everything, because the only way to flush eachMessage - * calls is to disconnect. - */ + * No choice but to reinitialize everything, because the only way to flush eachMessage + * calls is to disconnect. + */ consumer.connect() await sendMessages(kafka, testTopic, messages) await consumer.run({ eachMessage: async () => {}, autoCommit: false }) diff --git a/packages/datadog-plugin-mocha/src/index.js b/packages/datadog-plugin-mocha/src/index.js index 30f6e88a9fc..0513a4a95d6 100644 --- a/packages/datadog-plugin-mocha/src/index.js +++ b/packages/datadog-plugin-mocha/src/index.js @@ -242,7 +242,7 @@ class MochaPlugin extends CiPlugin { } }) - this.addSub('ci:mocha:test:retry', (isFirstAttempt) => { + this.addSub('ci:mocha:test:retry', ({ isFirstAttempt, err }) => { const store = storage.getStore() const span = store?.span if (span) { @@ -250,6 +250,9 @@ class MochaPlugin extends CiPlugin { if (!isFirstAttempt) { span.setTag(TEST_IS_RETRY, 'true') } + if (err) { + span.setTag('error', err) + } const spanTags = span.context()._tags this.telemetry.ciVisEvent( diff --git a/packages/datadog-plugin-openai/src/index.js b/packages/datadog-plugin-openai/src/index.js index f96b44543d2..c76f7333910 100644 --- a/packages/datadog-plugin-openai/src/index.js +++ b/packages/datadog-plugin-openai/src/index.js @@ -1,1023 +1,17 @@ 'use strict' -const path = require('path') +const CompositePlugin = require('../../dd-trace/src/plugins/composite') +const OpenAiTracingPlugin = require('./tracing') +const OpenAiLLMObsPlugin = require('../../dd-trace/src/llmobs/plugins/openai') -const TracingPlugin = require('../../dd-trace/src/plugins/tracing') -const { storage } = require('../../datadog-core') -const services = require('./services') -const Sampler = require('../../dd-trace/src/sampler') -const { MEASURED } = require('../../../ext/tags') -const { estimateTokens } = require('./token-estimator') - -// String#replaceAll unavailable on Node.js@v14 (dd-trace@<=v3) -const RE_NEWLINE = /\n/g -const RE_TAB = /\t/g - -// TODO: In the future we should refactor config.js to make it requirable -let MAX_TEXT_LEN = 128 - -function safeRequire (path) { - try { - // eslint-disable-next-line import/no-extraneous-dependencies - return require(path) - } catch { - return null - } -} - -const encodingForModel = safeRequire('tiktoken')?.encoding_for_model - -class OpenApiPlugin extends TracingPlugin { +class OpenAiPlugin extends CompositePlugin { static get id () { return 'openai' } - static get operation () { return 'request' } - static get system () { return 'openai' } - static get prefix () { - return 'tracing:apm:openai:request' - } - - constructor (...args) { - super(...args) - - const { metrics, logger } = services.init(this._tracerConfig) - this.metrics = metrics - this.logger = logger - - this.sampler = new Sampler(0.1) // default 10% log sampling - - // hoist the max length env var to avoid making all of these functions a class method - if (this._tracerConfig) { - MAX_TEXT_LEN = this._tracerConfig.openaiSpanCharLimit - } - } - - configure (config) { - if (config.enabled === false) { - services.shutdown() - } - - super.configure(config) - } - - bindStart (ctx) { - const { methodName, args, basePath, apiKey } = ctx - const payload = normalizeRequestPayload(methodName, args) - const store = storage.getStore() || {} - - const span = this.startSpan('openai.request', { - service: this.config.service, - resource: methodName, - type: 'openai', - kind: 'client', - meta: { - [MEASURED]: 1, - // Data that is always available with a request - 'openai.user.api_key': truncateApiKey(apiKey), - 'openai.api_base': basePath, - // The openai.api_type (openai|azure) is present in Python but not in Node.js - // Add support once https://github.com/openai/openai-node/issues/53 is closed - - // Data that is common across many requests - 'openai.request.best_of': payload.best_of, - 'openai.request.echo': payload.echo, - 'openai.request.logprobs': payload.logprobs, - 'openai.request.max_tokens': payload.max_tokens, - 'openai.request.model': payload.model, // vague model - 'openai.request.n': payload.n, - 'openai.request.presence_penalty': payload.presence_penalty, - 'openai.request.frequency_penalty': payload.frequency_penalty, - 'openai.request.stop': payload.stop, - 'openai.request.suffix': payload.suffix, - 'openai.request.temperature': payload.temperature, - 'openai.request.top_p': payload.top_p, - 'openai.request.user': payload.user, - 'openai.request.file_id': payload.file_id // deleteFile, retrieveFile, downloadFile - } - }, false) - - const openaiStore = Object.create(null) - - const tags = {} // The remaining tags are added one at a time - - // createChatCompletion, createCompletion, createImage, createImageEdit, createTranscription, createTranslation - if (payload.prompt) { - const prompt = payload.prompt - openaiStore.prompt = prompt - if (typeof prompt === 'string' || (Array.isArray(prompt) && typeof prompt[0] === 'number')) { - // This is a single prompt, either String or [Number] - tags['openai.request.prompt'] = normalizeStringOrTokenArray(prompt, true) - } else if (Array.isArray(prompt)) { - // This is multiple prompts, either [String] or [[Number]] - for (let i = 0; i < prompt.length; i++) { - tags[`openai.request.prompt.${i}`] = normalizeStringOrTokenArray(prompt[i], true) - } - } - } - - // createEdit, createEmbedding, createModeration - if (payload.input) { - const normalized = normalizeStringOrTokenArray(payload.input, false) - tags['openai.request.input'] = truncateText(normalized) - openaiStore.input = normalized - } - - // createChatCompletion, createCompletion - if (payload.logit_bias !== null && typeof payload.logit_bias === 'object') { - for (const [tokenId, bias] of Object.entries(payload.logit_bias)) { - tags[`openai.request.logit_bias.${tokenId}`] = bias - } - } - - if (payload.stream) { - tags['openai.request.stream'] = payload.stream - } - - switch (methodName) { - case 'createFineTune': - case 'fine_tuning.jobs.create': - case 'fine-tune.create': - createFineTuneRequestExtraction(tags, payload) - break - - case 'createImage': - case 'images.generate': - case 'createImageEdit': - case 'images.edit': - case 'createImageVariation': - case 'images.createVariation': - commonCreateImageRequestExtraction(tags, payload, openaiStore) - break - - case 'createChatCompletion': - case 'chat.completions.create': - createChatCompletionRequestExtraction(tags, payload, openaiStore) - break - - case 'createFile': - case 'files.create': - case 'retrieveFile': - case 'files.retrieve': - commonFileRequestExtraction(tags, payload) - break - - case 'createTranscription': - case 'audio.transcriptions.create': - case 'createTranslation': - case 'audio.translations.create': - commonCreateAudioRequestExtraction(tags, payload, openaiStore) - break - - case 'retrieveModel': - case 'models.retrieve': - retrieveModelRequestExtraction(tags, payload) - break - - case 'listFineTuneEvents': - case 'fine_tuning.jobs.listEvents': - case 'fine-tune.listEvents': - case 'retrieveFineTune': - case 'fine_tuning.jobs.retrieve': - case 'fine-tune.retrieve': - case 'deleteModel': - case 'models.del': - case 'cancelFineTune': - case 'fine_tuning.jobs.cancel': - case 'fine-tune.cancel': - commonLookupFineTuneRequestExtraction(tags, payload) - break - - case 'createEdit': - case 'edits.create': - createEditRequestExtraction(tags, payload, openaiStore) - break - } - - span.addTags(tags) - - ctx.currentStore = { ...store, span, openai: openaiStore } - - return ctx.currentStore - } - - asyncEnd (ctx) { - const { result } = ctx - const store = ctx.currentStore - - const span = store?.span - if (!span) return - - const error = !!span.context()._tags.error - - let headers, body, method, path - if (!error) { - headers = result.headers - body = result.data - method = result.request.method - path = result.request.path - } - - if (!error && headers?.constructor.name === 'Headers') { - headers = Object.fromEntries(headers) - } - const methodName = span._spanContext._tags['resource.name'] - - body = coerceResponseBody(body, methodName) - - const openaiStore = store.openai - - if (!error && (path?.startsWith('https://') || path?.startsWith('http://'))) { - // basic checking for if the path was set as a full URL - // not using a full regex as it will likely be "https://api.openai.com/..." - path = new URL(path).pathname - } - const endpoint = lookupOperationEndpoint(methodName, path) - - const tags = error - ? {} - : { - 'openai.request.endpoint': endpoint, - 'openai.request.method': method.toUpperCase(), - - 'openai.organization.id': body.organization_id, // only available in fine-tunes endpoints - 'openai.organization.name': headers['openai-organization'], - - 'openai.response.model': headers['openai-model'] || body.model, // specific model, often undefined - 'openai.response.id': body.id, // common creation value, numeric epoch - 'openai.response.deleted': body.deleted, // common boolean field in delete responses - - // The OpenAI API appears to use both created and created_at in different places - // Here we're conciously choosing to surface this inconsistency instead of normalizing - 'openai.response.created': body.created, - 'openai.response.created_at': body.created_at - } - - responseDataExtractionByMethod(methodName, tags, body, openaiStore) - span.addTags(tags) - - span.finish() - this.sendLog(methodName, span, tags, openaiStore, error) - this.sendMetrics(headers, body, endpoint, span._duration, error, tags) - } - - sendMetrics (headers, body, endpoint, duration, error, spanTags) { - const tags = [`error:${Number(!!error)}`] - if (error) { - this.metrics.increment('openai.request.error', 1, tags) - } else { - tags.push(`org:${headers['openai-organization']}`) - tags.push(`endpoint:${endpoint}`) // just "/v1/models", no method - tags.push(`model:${headers['openai-model'] || body.model}`) - } - - this.metrics.distribution('openai.request.duration', duration * 1000, tags) - - const promptTokens = spanTags['openai.response.usage.prompt_tokens'] - const promptTokensEstimated = spanTags['openai.response.usage.prompt_tokens_estimated'] - - const completionTokens = spanTags['openai.response.usage.completion_tokens'] - const completionTokensEstimated = spanTags['openai.response.usage.completion_tokens_estimated'] - - const totalTokens = spanTags['openai.response.usage.total_tokens'] - - if (!error) { - if (promptTokens != null) { - if (promptTokensEstimated) { - this.metrics.distribution( - 'openai.tokens.prompt', promptTokens, [...tags, 'openai.estimated:true']) - } else { - this.metrics.distribution('openai.tokens.prompt', promptTokens, tags) - } - } - - if (completionTokens != null) { - if (completionTokensEstimated) { - this.metrics.distribution( - 'openai.tokens.completion', completionTokens, [...tags, 'openai.estimated:true']) - } else { - this.metrics.distribution('openai.tokens.completion', completionTokens, tags) - } - } - - if (totalTokens != null) { - if (promptTokensEstimated || completionTokensEstimated) { - this.metrics.distribution( - 'openai.tokens.total', totalTokens, [...tags, 'openai.estimated:true']) - } else { - this.metrics.distribution('openai.tokens.total', totalTokens, tags) - } - } - } - - if (headers) { - if (headers['x-ratelimit-limit-requests']) { - this.metrics.gauge('openai.ratelimit.requests', Number(headers['x-ratelimit-limit-requests']), tags) - } - - if (headers['x-ratelimit-remaining-requests']) { - this.metrics.gauge( - 'openai.ratelimit.remaining.requests', Number(headers['x-ratelimit-remaining-requests']), tags - ) - } - - if (headers['x-ratelimit-limit-tokens']) { - this.metrics.gauge('openai.ratelimit.tokens', Number(headers['x-ratelimit-limit-tokens']), tags) - } - - if (headers['x-ratelimit-remaining-tokens']) { - this.metrics.gauge('openai.ratelimit.remaining.tokens', Number(headers['x-ratelimit-remaining-tokens']), tags) - } - } - } - - sendLog (methodName, span, tags, openaiStore, error) { - if (!openaiStore) return - if (!Object.keys(openaiStore).length) return - if (!this.sampler.isSampled()) return - - const log = { - status: error ? 'error' : 'info', - message: `sampled ${methodName}`, - ...openaiStore - } - - this.logger.log(log, span, tags) - } -} - -function countPromptTokens (methodName, payload, model) { - let promptTokens = 0 - let promptEstimated = false - if (methodName === 'chat.completions.create') { - const messages = payload.messages - for (const message of messages) { - const content = message.content - if (typeof content === 'string') { - const { tokens, estimated } = countTokens(content, model) - promptTokens += tokens - promptEstimated = estimated - } else if (Array.isArray(content)) { - for (const c of content) { - if (c.type === 'text') { - const { tokens, estimated } = countTokens(c.text, model) - promptTokens += tokens - promptEstimated = estimated - } - // unsupported token computation for image_url - // as even though URL is a string, its true token count - // is based on the image itself, something onerous to do client-side - } - } - } - } else if (methodName === 'completions.create') { - let prompt = payload.prompt - if (!Array.isArray(prompt)) prompt = [prompt] - - for (const p of prompt) { - const { tokens, estimated } = countTokens(p, model) - promptTokens += tokens - promptEstimated = estimated - } - } - - return { promptTokens, promptEstimated } -} - -function countCompletionTokens (body, model) { - let completionTokens = 0 - let completionEstimated = false - if (body?.choices) { - for (const choice of body.choices) { - const message = choice.message || choice.delta // delta for streamed responses - const text = choice.text - const content = text || message?.content - - const { tokens, estimated } = countTokens(content, model) - completionTokens += tokens - completionEstimated = estimated - } - } - - return { completionTokens, completionEstimated } -} - -function countTokens (content, model) { - if (encodingForModel) { - try { - // try using tiktoken if it was available - const encoder = encodingForModel(model) - const tokens = encoder.encode(content).length - encoder.free() - return { tokens, estimated: false } - } catch { - // possible errors from tiktoken: - // * model not available for token counts - // * issue encoding content - } - } - - return { - tokens: estimateTokens(content), - estimated: true - } -} - -function createEditRequestExtraction (tags, payload, openaiStore) { - const instruction = payload.instruction - tags['openai.request.instruction'] = instruction - openaiStore.instruction = instruction -} - -function retrieveModelRequestExtraction (tags, payload) { - tags['openai.request.id'] = payload.id -} - -function createChatCompletionRequestExtraction (tags, payload, openaiStore) { - const messages = payload.messages - if (!defensiveArrayLength(messages)) return - - openaiStore.messages = payload.messages - for (let i = 0; i < payload.messages.length; i++) { - const message = payload.messages[i] - tagChatCompletionRequestContent(message.content, i, tags) - tags[`openai.request.messages.${i}.role`] = message.role - tags[`openai.request.messages.${i}.name`] = message.name - tags[`openai.request.messages.${i}.finish_reason`] = message.finish_reason - } -} - -function commonCreateImageRequestExtraction (tags, payload, openaiStore) { - // createImageEdit, createImageVariation - const img = payload.file || payload.image - if (img !== null && typeof img === 'object' && img.path) { - const file = path.basename(img.path) - tags['openai.request.image'] = file - openaiStore.file = file - } - - // createImageEdit - if (payload.mask !== null && typeof payload.mask === 'object' && payload.mask.path) { - const mask = path.basename(payload.mask.path) - tags['openai.request.mask'] = mask - openaiStore.mask = mask - } - - tags['openai.request.size'] = payload.size - tags['openai.request.response_format'] = payload.response_format - tags['openai.request.language'] = payload.language -} - -function responseDataExtractionByMethod (methodName, tags, body, openaiStore) { - switch (methodName) { - case 'createModeration': - case 'moderations.create': - createModerationResponseExtraction(tags, body) - break - - case 'createCompletion': - case 'completions.create': - case 'createChatCompletion': - case 'chat.completions.create': - case 'createEdit': - case 'edits.create': - commonCreateResponseExtraction(tags, body, openaiStore, methodName) - break - - case 'listFiles': - case 'files.list': - case 'listFineTunes': - case 'fine_tuning.jobs.list': - case 'fine-tune.list': - case 'listFineTuneEvents': - case 'fine_tuning.jobs.listEvents': - case 'fine-tune.listEvents': - commonListCountResponseExtraction(tags, body) - break - - case 'createEmbedding': - case 'embeddings.create': - createEmbeddingResponseExtraction(tags, body, openaiStore) - break - - case 'createFile': - case 'files.create': - case 'retrieveFile': - case 'files.retrieve': - createRetrieveFileResponseExtraction(tags, body) - break - - case 'deleteFile': - case 'files.del': - deleteFileResponseExtraction(tags, body) - break - - case 'downloadFile': - case 'files.retrieveContent': - case 'files.content': - downloadFileResponseExtraction(tags, body) - break - - case 'createFineTune': - case 'fine_tuning.jobs.create': - case 'fine-tune.create': - case 'retrieveFineTune': - case 'fine_tuning.jobs.retrieve': - case 'fine-tune.retrieve': - case 'cancelFineTune': - case 'fine_tuning.jobs.cancel': - case 'fine-tune.cancel': - commonFineTuneResponseExtraction(tags, body) - break - - case 'createTranscription': - case 'audio.transcriptions.create': - case 'createTranslation': - case 'audio.translations.create': - createAudioResponseExtraction(tags, body) - break - - case 'createImage': - case 'images.generate': - case 'createImageEdit': - case 'images.edit': - case 'createImageVariation': - case 'images.createVariation': - commonImageResponseExtraction(tags, body) - break - - case 'listModels': - case 'models.list': - listModelsResponseExtraction(tags, body) - break - - case 'retrieveModel': - case 'models.retrieve': - retrieveModelResponseExtraction(tags, body) - break - } -} - -function retrieveModelResponseExtraction (tags, body) { - tags['openai.response.owned_by'] = body.owned_by - tags['openai.response.parent'] = body.parent - tags['openai.response.root'] = body.root - - if (!body.permission) return - - tags['openai.response.permission.id'] = body.permission[0].id - tags['openai.response.permission.created'] = body.permission[0].created - tags['openai.response.permission.allow_create_engine'] = body.permission[0].allow_create_engine - tags['openai.response.permission.allow_sampling'] = body.permission[0].allow_sampling - tags['openai.response.permission.allow_logprobs'] = body.permission[0].allow_logprobs - tags['openai.response.permission.allow_search_indices'] = body.permission[0].allow_search_indices - tags['openai.response.permission.allow_view'] = body.permission[0].allow_view - tags['openai.response.permission.allow_fine_tuning'] = body.permission[0].allow_fine_tuning - tags['openai.response.permission.organization'] = body.permission[0].organization - tags['openai.response.permission.group'] = body.permission[0].group - tags['openai.response.permission.is_blocking'] = body.permission[0].is_blocking -} - -function commonLookupFineTuneRequestExtraction (tags, body) { - tags['openai.request.fine_tune_id'] = body.fine_tune_id - tags['openai.request.stream'] = !!body.stream // listFineTuneEvents -} - -function listModelsResponseExtraction (tags, body) { - if (!body.data) return - - tags['openai.response.count'] = body.data.length -} - -function commonImageResponseExtraction (tags, body) { - if (!body.data) return - - tags['openai.response.images_count'] = body.data.length - - for (let i = 0; i < body.data.length; i++) { - const image = body.data[i] - // exactly one of these two options is provided - tags[`openai.response.images.${i}.url`] = truncateText(image.url) - tags[`openai.response.images.${i}.b64_json`] = image.b64_json && 'returned' - } -} - -function createAudioResponseExtraction (tags, body) { - tags['openai.response.text'] = body.text - tags['openai.response.language'] = body.language - tags['openai.response.duration'] = body.duration - tags['openai.response.segments_count'] = defensiveArrayLength(body.segments) -} - -function createFineTuneRequestExtraction (tags, body) { - tags['openai.request.training_file'] = body.training_file - tags['openai.request.validation_file'] = body.validation_file - tags['openai.request.n_epochs'] = body.n_epochs - tags['openai.request.batch_size'] = body.batch_size - tags['openai.request.learning_rate_multiplier'] = body.learning_rate_multiplier - tags['openai.request.prompt_loss_weight'] = body.prompt_loss_weight - tags['openai.request.compute_classification_metrics'] = body.compute_classification_metrics - tags['openai.request.classification_n_classes'] = body.classification_n_classes - tags['openai.request.classification_positive_class'] = body.classification_positive_class - tags['openai.request.classification_betas_count'] = defensiveArrayLength(body.classification_betas) -} - -function commonFineTuneResponseExtraction (tags, body) { - tags['openai.response.events_count'] = defensiveArrayLength(body.events) - tags['openai.response.fine_tuned_model'] = body.fine_tuned_model - - const hyperparams = body.hyperparams || body.hyperparameters - const hyperparamsKey = body.hyperparams ? 'hyperparams' : 'hyperparameters' - - if (hyperparams) { - tags[`openai.response.${hyperparamsKey}.n_epochs`] = hyperparams.n_epochs - tags[`openai.response.${hyperparamsKey}.batch_size`] = hyperparams.batch_size - tags[`openai.response.${hyperparamsKey}.prompt_loss_weight`] = hyperparams.prompt_loss_weight - tags[`openai.response.${hyperparamsKey}.learning_rate_multiplier`] = hyperparams.learning_rate_multiplier - } - tags['openai.response.training_files_count'] = defensiveArrayLength(body.training_files || body.training_file) - tags['openai.response.result_files_count'] = defensiveArrayLength(body.result_files) - tags['openai.response.validation_files_count'] = defensiveArrayLength(body.validation_files || body.validation_file) - tags['openai.response.updated_at'] = body.updated_at - tags['openai.response.status'] = body.status -} - -// the OpenAI package appears to stream the content download then provide it all as a singular string -function downloadFileResponseExtraction (tags, body) { - if (!body.file) return - tags['openai.response.total_bytes'] = body.file.length -} - -function deleteFileResponseExtraction (tags, body) { - tags['openai.response.id'] = body.id -} - -function commonCreateAudioRequestExtraction (tags, body, openaiStore) { - tags['openai.request.response_format'] = body.response_format - tags['openai.request.language'] = body.language - - if (body.file !== null && typeof body.file === 'object' && body.file.path) { - const filename = path.basename(body.file.path) - tags['openai.request.filename'] = filename - openaiStore.file = filename - } -} - -function commonFileRequestExtraction (tags, body) { - tags['openai.request.purpose'] = body.purpose - - // User can provider either exact file contents or a file read stream - // With the stream we extract the filepath - // This is a best effort attempt to extract the filename during the request - if (body.file !== null && typeof body.file === 'object' && body.file.path) { - tags['openai.request.filename'] = path.basename(body.file.path) - } -} - -function createRetrieveFileResponseExtraction (tags, body) { - tags['openai.response.filename'] = body.filename - tags['openai.response.purpose'] = body.purpose - tags['openai.response.bytes'] = body.bytes - tags['openai.response.status'] = body.status - tags['openai.response.status_details'] = body.status_details -} - -function createEmbeddingResponseExtraction (tags, body, openaiStore) { - usageExtraction(tags, body, openaiStore) - - if (!body.data) return - - tags['openai.response.embeddings_count'] = body.data.length - for (let i = 0; i < body.data.length; i++) { - tags[`openai.response.embedding.${i}.embedding_length`] = body.data[i].embedding.length - } -} - -function commonListCountResponseExtraction (tags, body) { - if (!body.data) return - tags['openai.response.count'] = body.data.length -} - -// TODO: Is there ever more than one entry in body.results? -function createModerationResponseExtraction (tags, body) { - tags['openai.response.id'] = body.id - // tags[`openai.response.model`] = body.model // redundant, already extracted globally - - if (!body.results) return - - tags['openai.response.flagged'] = body.results[0].flagged - - for (const [category, match] of Object.entries(body.results[0].categories)) { - tags[`openai.response.categories.${category}`] = match - } - - for (const [category, score] of Object.entries(body.results[0].category_scores)) { - tags[`openai.response.category_scores.${category}`] = score - } -} - -// createCompletion, createChatCompletion, createEdit -function commonCreateResponseExtraction (tags, body, openaiStore, methodName) { - usageExtraction(tags, body, methodName, openaiStore) - - if (!body.choices) return - - tags['openai.response.choices_count'] = body.choices.length - - openaiStore.choices = body.choices - - for (let choiceIdx = 0; choiceIdx < body.choices.length; choiceIdx++) { - const choice = body.choices[choiceIdx] - - // logprobs can be null and we still want to tag it as 'returned' even when set to 'null' - const specifiesLogProb = Object.keys(choice).indexOf('logprobs') !== -1 - - tags[`openai.response.choices.${choiceIdx}.finish_reason`] = choice.finish_reason - tags[`openai.response.choices.${choiceIdx}.logprobs`] = specifiesLogProb ? 'returned' : undefined - tags[`openai.response.choices.${choiceIdx}.text`] = truncateText(choice.text) - - // createChatCompletion only - const message = choice.message || choice.delta // delta for streamed responses - if (message) { - tags[`openai.response.choices.${choiceIdx}.message.role`] = message.role - tags[`openai.response.choices.${choiceIdx}.message.content`] = truncateText(message.content) - tags[`openai.response.choices.${choiceIdx}.message.name`] = truncateText(message.name) - if (message.tool_calls) { - const toolCalls = message.tool_calls - for (let toolIdx = 0; toolIdx < toolCalls.length; toolIdx++) { - tags[`openai.response.choices.${choiceIdx}.message.tool_calls.${toolIdx}.function.name`] = - toolCalls[toolIdx].function.name - tags[`openai.response.choices.${choiceIdx}.message.tool_calls.${toolIdx}.function.arguments`] = - toolCalls[toolIdx].function.arguments - tags[`openai.response.choices.${choiceIdx}.message.tool_calls.${toolIdx}.id`] = - toolCalls[toolIdx].id - } - } - } - } -} - -// createCompletion, createChatCompletion, createEdit, createEmbedding -function usageExtraction (tags, body, methodName, openaiStore) { - let promptTokens = 0 - let completionTokens = 0 - let totalTokens = 0 - if (body && body.usage) { - promptTokens = body.usage.prompt_tokens - completionTokens = body.usage.completion_tokens - totalTokens = body.usage.total_tokens - } else if (body.model && ['chat.completions.create', 'completions.create'].includes(methodName)) { - // estimate tokens based on method name for completions and chat completions - const { model } = body - let promptEstimated = false - let completionEstimated = false - - // prompt tokens - const payload = openaiStore - const promptTokensCount = countPromptTokens(methodName, payload, model) - promptTokens = promptTokensCount.promptTokens - promptEstimated = promptTokensCount.promptEstimated - - // completion tokens - const completionTokensCount = countCompletionTokens(body, model) - completionTokens = completionTokensCount.completionTokens - completionEstimated = completionTokensCount.completionEstimated - - // total tokens - totalTokens = promptTokens + completionTokens - if (promptEstimated) tags['openai.response.usage.prompt_tokens_estimated'] = true - if (completionEstimated) tags['openai.response.usage.completion_tokens_estimated'] = true - } - - if (promptTokens != null) tags['openai.response.usage.prompt_tokens'] = promptTokens - if (completionTokens != null) tags['openai.response.usage.completion_tokens'] = completionTokens - if (totalTokens != null) tags['openai.response.usage.total_tokens'] = totalTokens -} - -function truncateApiKey (apiKey) { - return apiKey && `sk-...${apiKey.substr(apiKey.length - 4)}` -} - -/** - * for cleaning up prompt and response - */ -function truncateText (text) { - if (!text) return - if (typeof text !== 'string' || !text || (typeof text === 'string' && text.length === 0)) return - - text = text - .replace(RE_NEWLINE, '\\n') - .replace(RE_TAB, '\\t') - - if (text.length > MAX_TEXT_LEN) { - return text.substring(0, MAX_TEXT_LEN) + '...' - } - - return text -} - -function tagChatCompletionRequestContent (contents, messageIdx, tags) { - if (typeof contents === 'string') { - tags[`openai.request.messages.${messageIdx}.content`] = contents - } else if (Array.isArray(contents)) { - // content can also be an array of objects - // which represent text input or image url - for (const contentIdx in contents) { - const content = contents[contentIdx] - const type = content.type - tags[`openai.request.messages.${messageIdx}.content.${contentIdx}.type`] = content.type - if (type === 'text') { - tags[`openai.request.messages.${messageIdx}.content.${contentIdx}.text`] = truncateText(content.text) - } else if (type === 'image_url') { - tags[`openai.request.messages.${messageIdx}.content.${contentIdx}.image_url.url`] = - truncateText(content.image_url.url) - } - // unsupported type otherwise, won't be tagged - } - } - // unsupported type otherwise, won't be tagged -} - -// The server almost always responds with JSON -function coerceResponseBody (body, methodName) { - switch (methodName) { - case 'downloadFile': - case 'files.retrieveContent': - case 'files.content': - return { file: body } - } - - const type = typeof body - if (type === 'string') { - try { - return JSON.parse(body) - } catch { - return body + static get plugins () { + return { + llmobs: OpenAiLLMObsPlugin, + tracing: OpenAiTracingPlugin } - } else if (type === 'object') { - return body - } else { - return {} } } -// This method is used to replace a dynamic URL segment with an asterisk -function lookupOperationEndpoint (operationId, url) { - switch (operationId) { - case 'deleteModel': - case 'models.del': - case 'retrieveModel': - case 'models.retrieve': - return '/v1/models/*' - - case 'deleteFile': - case 'files.del': - case 'retrieveFile': - case 'files.retrieve': - return '/v1/files/*' - - case 'downloadFile': - case 'files.retrieveContent': - case 'files.content': - return '/v1/files/*/content' - - case 'retrieveFineTune': - case 'fine-tune.retrieve': - return '/v1/fine-tunes/*' - case 'fine_tuning.jobs.retrieve': - return '/v1/fine_tuning/jobs/*' - - case 'listFineTuneEvents': - case 'fine-tune.listEvents': - return '/v1/fine-tunes/*/events' - case 'fine_tuning.jobs.listEvents': - return '/v1/fine_tuning/jobs/*/events' - - case 'cancelFineTune': - case 'fine-tune.cancel': - return '/v1/fine-tunes/*/cancel' - case 'fine_tuning.jobs.cancel': - return '/v1/fine_tuning/jobs/*/cancel' - } - - return url -} - -/** - * This function essentially normalizes the OpenAI method interface. Many methods accept - * a single object argument. The remaining ones take individual arguments. This function - * turns the individual arguments into an object to make extracting properties consistent. - */ -function normalizeRequestPayload (methodName, args) { - switch (methodName) { - case 'listModels': - case 'models.list': - case 'listFiles': - case 'files.list': - case 'listFineTunes': - case 'fine_tuning.jobs.list': - case 'fine-tune.list': - // no argument - return {} - - case 'retrieveModel': - case 'models.retrieve': - return { id: args[0] } - - case 'createFile': - return { - file: args[0], - purpose: args[1] - } - - case 'deleteFile': - case 'files.del': - case 'retrieveFile': - case 'files.retrieve': - case 'downloadFile': - case 'files.retrieveContent': - case 'files.content': - return { file_id: args[0] } - - case 'listFineTuneEvents': - case 'fine_tuning.jobs.listEvents': - case 'fine-tune.listEvents': - return { - fine_tune_id: args[0], - stream: args[1] // undocumented - } - - case 'retrieveFineTune': - case 'fine_tuning.jobs.retrieve': - case 'fine-tune.retrieve': - case 'deleteModel': - case 'models.del': - case 'cancelFineTune': - case 'fine_tuning.jobs.cancel': - case 'fine-tune.cancel': - return { fine_tune_id: args[0] } - - case 'createImageEdit': - return { - file: args[0], - prompt: args[1], // Note: order of prompt/mask in Node.js lib differs from public docs - mask: args[2], - n: args[3], - size: args[4], - response_format: args[5], - user: args[6] - } - - case 'createImageVariation': - return { - file: args[0], - n: args[1], - size: args[2], - response_format: args[3], - user: args[4] - } - - case 'createTranscription': - case 'createTranslation': - return { - file: args[0], - model: args[1], - prompt: args[2], - response_format: args[3], - temperature: args[4], - language: args[5] // only used for createTranscription - } - } - - // Remaining OpenAI methods take a single object argument - return args[0] -} - -/** - * Converts an array of tokens to a string - * If input is already a string it's returned - * In either case the value is truncated - - * It's intentional that the array be truncated arbitrarily, e.g. "[999, 888, 77..." - - * "foo" -> "foo" - * [1,2,3] -> "[1, 2, 3]" - */ -function normalizeStringOrTokenArray (input, truncate) { - const normalized = Array.isArray(input) - ? `[${input.join(', ')}]` // "[1, 2, 999]" - : input // "foo" - return truncate ? truncateText(normalized) : normalized -} - -function defensiveArrayLength (maybeArray) { - if (maybeArray) { - if (Array.isArray(maybeArray)) { - return maybeArray.length - } else { - // case of a singular item (ie body.training_file vs body.training_files) - return 1 - } - } - - return undefined -} - -module.exports = OpenApiPlugin +module.exports = OpenAiPlugin diff --git a/packages/datadog-plugin-openai/src/tracing.js b/packages/datadog-plugin-openai/src/tracing.js new file mode 100644 index 00000000000..a92f66a6df6 --- /dev/null +++ b/packages/datadog-plugin-openai/src/tracing.js @@ -0,0 +1,1023 @@ +'use strict' + +const path = require('path') + +const TracingPlugin = require('../../dd-trace/src/plugins/tracing') +const { storage } = require('../../datadog-core') +const services = require('./services') +const Sampler = require('../../dd-trace/src/sampler') +const { MEASURED } = require('../../../ext/tags') +const { estimateTokens } = require('./token-estimator') + +// String#replaceAll unavailable on Node.js@v14 (dd-trace@<=v3) +const RE_NEWLINE = /\n/g +const RE_TAB = /\t/g + +// TODO: In the future we should refactor config.js to make it requirable +let MAX_TEXT_LEN = 128 + +function safeRequire (path) { + try { + // eslint-disable-next-line import/no-extraneous-dependencies + return require(path) + } catch { + return null + } +} + +const encodingForModel = safeRequire('tiktoken')?.encoding_for_model + +class OpenAiTracingPlugin extends TracingPlugin { + static get id () { return 'openai' } + static get operation () { return 'request' } + static get system () { return 'openai' } + static get prefix () { + return 'tracing:apm:openai:request' + } + + constructor (...args) { + super(...args) + + const { metrics, logger } = services.init(this._tracerConfig) + this.metrics = metrics + this.logger = logger + + this.sampler = new Sampler(0.1) // default 10% log sampling + + // hoist the max length env var to avoid making all of these functions a class method + if (this._tracerConfig) { + MAX_TEXT_LEN = this._tracerConfig.openaiSpanCharLimit + } + } + + configure (config) { + if (config.enabled === false) { + services.shutdown() + } + + super.configure(config) + } + + bindStart (ctx) { + const { methodName, args, basePath, apiKey } = ctx + const payload = normalizeRequestPayload(methodName, args) + const store = storage.getStore() || {} + + const span = this.startSpan('openai.request', { + service: this.config.service, + resource: methodName, + type: 'openai', + kind: 'client', + meta: { + [MEASURED]: 1, + // Data that is always available with a request + 'openai.user.api_key': truncateApiKey(apiKey), + 'openai.api_base': basePath, + // The openai.api_type (openai|azure) is present in Python but not in Node.js + // Add support once https://github.com/openai/openai-node/issues/53 is closed + + // Data that is common across many requests + 'openai.request.best_of': payload.best_of, + 'openai.request.echo': payload.echo, + 'openai.request.logprobs': payload.logprobs, + 'openai.request.max_tokens': payload.max_tokens, + 'openai.request.model': payload.model, // vague model + 'openai.request.n': payload.n, + 'openai.request.presence_penalty': payload.presence_penalty, + 'openai.request.frequency_penalty': payload.frequency_penalty, + 'openai.request.stop': payload.stop, + 'openai.request.suffix': payload.suffix, + 'openai.request.temperature': payload.temperature, + 'openai.request.top_p': payload.top_p, + 'openai.request.user': payload.user, + 'openai.request.file_id': payload.file_id // deleteFile, retrieveFile, downloadFile + } + }, false) + + const openaiStore = Object.create(null) + + const tags = {} // The remaining tags are added one at a time + + // createChatCompletion, createCompletion, createImage, createImageEdit, createTranscription, createTranslation + if (payload.prompt) { + const prompt = payload.prompt + openaiStore.prompt = prompt + if (typeof prompt === 'string' || (Array.isArray(prompt) && typeof prompt[0] === 'number')) { + // This is a single prompt, either String or [Number] + tags['openai.request.prompt'] = normalizeStringOrTokenArray(prompt, true) + } else if (Array.isArray(prompt)) { + // This is multiple prompts, either [String] or [[Number]] + for (let i = 0; i < prompt.length; i++) { + tags[`openai.request.prompt.${i}`] = normalizeStringOrTokenArray(prompt[i], true) + } + } + } + + // createEdit, createEmbedding, createModeration + if (payload.input) { + const normalized = normalizeStringOrTokenArray(payload.input, false) + tags['openai.request.input'] = truncateText(normalized) + openaiStore.input = normalized + } + + // createChatCompletion, createCompletion + if (payload.logit_bias !== null && typeof payload.logit_bias === 'object') { + for (const [tokenId, bias] of Object.entries(payload.logit_bias)) { + tags[`openai.request.logit_bias.${tokenId}`] = bias + } + } + + if (payload.stream) { + tags['openai.request.stream'] = payload.stream + } + + switch (methodName) { + case 'createFineTune': + case 'fine_tuning.jobs.create': + case 'fine-tune.create': + createFineTuneRequestExtraction(tags, payload) + break + + case 'createImage': + case 'images.generate': + case 'createImageEdit': + case 'images.edit': + case 'createImageVariation': + case 'images.createVariation': + commonCreateImageRequestExtraction(tags, payload, openaiStore) + break + + case 'createChatCompletion': + case 'chat.completions.create': + createChatCompletionRequestExtraction(tags, payload, openaiStore) + break + + case 'createFile': + case 'files.create': + case 'retrieveFile': + case 'files.retrieve': + commonFileRequestExtraction(tags, payload) + break + + case 'createTranscription': + case 'audio.transcriptions.create': + case 'createTranslation': + case 'audio.translations.create': + commonCreateAudioRequestExtraction(tags, payload, openaiStore) + break + + case 'retrieveModel': + case 'models.retrieve': + retrieveModelRequestExtraction(tags, payload) + break + + case 'listFineTuneEvents': + case 'fine_tuning.jobs.listEvents': + case 'fine-tune.listEvents': + case 'retrieveFineTune': + case 'fine_tuning.jobs.retrieve': + case 'fine-tune.retrieve': + case 'deleteModel': + case 'models.del': + case 'cancelFineTune': + case 'fine_tuning.jobs.cancel': + case 'fine-tune.cancel': + commonLookupFineTuneRequestExtraction(tags, payload) + break + + case 'createEdit': + case 'edits.create': + createEditRequestExtraction(tags, payload, openaiStore) + break + } + + span.addTags(tags) + + ctx.currentStore = { ...store, span, openai: openaiStore } + + return ctx.currentStore + } + + asyncEnd (ctx) { + const { result } = ctx + const store = ctx.currentStore + + const span = store?.span + if (!span) return + + const error = !!span.context()._tags.error + + let headers, body, method, path + if (!error) { + headers = result.headers + body = result.data + method = result.request.method + path = result.request.path + } + + if (!error && headers?.constructor.name === 'Headers') { + headers = Object.fromEntries(headers) + } + const methodName = span._spanContext._tags['resource.name'] + + body = coerceResponseBody(body, methodName) + + const openaiStore = store.openai + + if (!error && (path?.startsWith('https://') || path?.startsWith('http://'))) { + // basic checking for if the path was set as a full URL + // not using a full regex as it will likely be "https://api.openai.com/..." + path = new URL(path).pathname + } + const endpoint = lookupOperationEndpoint(methodName, path) + + const tags = error + ? {} + : { + 'openai.request.endpoint': endpoint, + 'openai.request.method': method.toUpperCase(), + + 'openai.organization.id': body.organization_id, // only available in fine-tunes endpoints + 'openai.organization.name': headers['openai-organization'], + + 'openai.response.model': headers['openai-model'] || body.model, // specific model, often undefined + 'openai.response.id': body.id, // common creation value, numeric epoch + 'openai.response.deleted': body.deleted, // common boolean field in delete responses + + // The OpenAI API appears to use both created and created_at in different places + // Here we're conciously choosing to surface this inconsistency instead of normalizing + 'openai.response.created': body.created, + 'openai.response.created_at': body.created_at + } + + responseDataExtractionByMethod(methodName, tags, body, openaiStore) + span.addTags(tags) + + span.finish() + this.sendLog(methodName, span, tags, openaiStore, error) + this.sendMetrics(headers, body, endpoint, span._duration, error, tags) + } + + sendMetrics (headers, body, endpoint, duration, error, spanTags) { + const tags = [`error:${Number(!!error)}`] + if (error) { + this.metrics.increment('openai.request.error', 1, tags) + } else { + tags.push(`org:${headers['openai-organization']}`) + tags.push(`endpoint:${endpoint}`) // just "/v1/models", no method + tags.push(`model:${headers['openai-model'] || body.model}`) + } + + this.metrics.distribution('openai.request.duration', duration * 1000, tags) + + const promptTokens = spanTags['openai.response.usage.prompt_tokens'] + const promptTokensEstimated = spanTags['openai.response.usage.prompt_tokens_estimated'] + + const completionTokens = spanTags['openai.response.usage.completion_tokens'] + const completionTokensEstimated = spanTags['openai.response.usage.completion_tokens_estimated'] + + const totalTokens = spanTags['openai.response.usage.total_tokens'] + + if (!error) { + if (promptTokens != null) { + if (promptTokensEstimated) { + this.metrics.distribution( + 'openai.tokens.prompt', promptTokens, [...tags, 'openai.estimated:true']) + } else { + this.metrics.distribution('openai.tokens.prompt', promptTokens, tags) + } + } + + if (completionTokens != null) { + if (completionTokensEstimated) { + this.metrics.distribution( + 'openai.tokens.completion', completionTokens, [...tags, 'openai.estimated:true']) + } else { + this.metrics.distribution('openai.tokens.completion', completionTokens, tags) + } + } + + if (totalTokens != null) { + if (promptTokensEstimated || completionTokensEstimated) { + this.metrics.distribution( + 'openai.tokens.total', totalTokens, [...tags, 'openai.estimated:true']) + } else { + this.metrics.distribution('openai.tokens.total', totalTokens, tags) + } + } + } + + if (headers) { + if (headers['x-ratelimit-limit-requests']) { + this.metrics.gauge('openai.ratelimit.requests', Number(headers['x-ratelimit-limit-requests']), tags) + } + + if (headers['x-ratelimit-remaining-requests']) { + this.metrics.gauge( + 'openai.ratelimit.remaining.requests', Number(headers['x-ratelimit-remaining-requests']), tags + ) + } + + if (headers['x-ratelimit-limit-tokens']) { + this.metrics.gauge('openai.ratelimit.tokens', Number(headers['x-ratelimit-limit-tokens']), tags) + } + + if (headers['x-ratelimit-remaining-tokens']) { + this.metrics.gauge('openai.ratelimit.remaining.tokens', Number(headers['x-ratelimit-remaining-tokens']), tags) + } + } + } + + sendLog (methodName, span, tags, openaiStore, error) { + if (!openaiStore) return + if (!Object.keys(openaiStore).length) return + if (!this.sampler.isSampled()) return + + const log = { + status: error ? 'error' : 'info', + message: `sampled ${methodName}`, + ...openaiStore + } + + this.logger.log(log, span, tags) + } +} + +function countPromptTokens (methodName, payload, model) { + let promptTokens = 0 + let promptEstimated = false + if (methodName === 'chat.completions.create') { + const messages = payload.messages + for (const message of messages) { + const content = message.content + if (typeof content === 'string') { + const { tokens, estimated } = countTokens(content, model) + promptTokens += tokens + promptEstimated = estimated + } else if (Array.isArray(content)) { + for (const c of content) { + if (c.type === 'text') { + const { tokens, estimated } = countTokens(c.text, model) + promptTokens += tokens + promptEstimated = estimated + } + // unsupported token computation for image_url + // as even though URL is a string, its true token count + // is based on the image itself, something onerous to do client-side + } + } + } + } else if (methodName === 'completions.create') { + let prompt = payload.prompt + if (!Array.isArray(prompt)) prompt = [prompt] + + for (const p of prompt) { + const { tokens, estimated } = countTokens(p, model) + promptTokens += tokens + promptEstimated = estimated + } + } + + return { promptTokens, promptEstimated } +} + +function countCompletionTokens (body, model) { + let completionTokens = 0 + let completionEstimated = false + if (body?.choices) { + for (const choice of body.choices) { + const message = choice.message || choice.delta // delta for streamed responses + const text = choice.text + const content = text || message?.content + + const { tokens, estimated } = countTokens(content, model) + completionTokens += tokens + completionEstimated = estimated + } + } + + return { completionTokens, completionEstimated } +} + +function countTokens (content, model) { + if (encodingForModel) { + try { + // try using tiktoken if it was available + const encoder = encodingForModel(model) + const tokens = encoder.encode(content).length + encoder.free() + return { tokens, estimated: false } + } catch { + // possible errors from tiktoken: + // * model not available for token counts + // * issue encoding content + } + } + + return { + tokens: estimateTokens(content), + estimated: true + } +} + +function createEditRequestExtraction (tags, payload, openaiStore) { + const instruction = payload.instruction + tags['openai.request.instruction'] = instruction + openaiStore.instruction = instruction +} + +function retrieveModelRequestExtraction (tags, payload) { + tags['openai.request.id'] = payload.id +} + +function createChatCompletionRequestExtraction (tags, payload, openaiStore) { + const messages = payload.messages + if (!defensiveArrayLength(messages)) return + + openaiStore.messages = payload.messages + for (let i = 0; i < payload.messages.length; i++) { + const message = payload.messages[i] + tagChatCompletionRequestContent(message.content, i, tags) + tags[`openai.request.messages.${i}.role`] = message.role + tags[`openai.request.messages.${i}.name`] = message.name + tags[`openai.request.messages.${i}.finish_reason`] = message.finish_reason + } +} + +function commonCreateImageRequestExtraction (tags, payload, openaiStore) { + // createImageEdit, createImageVariation + const img = payload.file || payload.image + if (img !== null && typeof img === 'object' && img.path) { + const file = path.basename(img.path) + tags['openai.request.image'] = file + openaiStore.file = file + } + + // createImageEdit + if (payload.mask !== null && typeof payload.mask === 'object' && payload.mask.path) { + const mask = path.basename(payload.mask.path) + tags['openai.request.mask'] = mask + openaiStore.mask = mask + } + + tags['openai.request.size'] = payload.size + tags['openai.request.response_format'] = payload.response_format + tags['openai.request.language'] = payload.language +} + +function responseDataExtractionByMethod (methodName, tags, body, openaiStore) { + switch (methodName) { + case 'createModeration': + case 'moderations.create': + createModerationResponseExtraction(tags, body) + break + + case 'createCompletion': + case 'completions.create': + case 'createChatCompletion': + case 'chat.completions.create': + case 'createEdit': + case 'edits.create': + commonCreateResponseExtraction(tags, body, openaiStore, methodName) + break + + case 'listFiles': + case 'files.list': + case 'listFineTunes': + case 'fine_tuning.jobs.list': + case 'fine-tune.list': + case 'listFineTuneEvents': + case 'fine_tuning.jobs.listEvents': + case 'fine-tune.listEvents': + commonListCountResponseExtraction(tags, body) + break + + case 'createEmbedding': + case 'embeddings.create': + createEmbeddingResponseExtraction(tags, body, openaiStore) + break + + case 'createFile': + case 'files.create': + case 'retrieveFile': + case 'files.retrieve': + createRetrieveFileResponseExtraction(tags, body) + break + + case 'deleteFile': + case 'files.del': + deleteFileResponseExtraction(tags, body) + break + + case 'downloadFile': + case 'files.retrieveContent': + case 'files.content': + downloadFileResponseExtraction(tags, body) + break + + case 'createFineTune': + case 'fine_tuning.jobs.create': + case 'fine-tune.create': + case 'retrieveFineTune': + case 'fine_tuning.jobs.retrieve': + case 'fine-tune.retrieve': + case 'cancelFineTune': + case 'fine_tuning.jobs.cancel': + case 'fine-tune.cancel': + commonFineTuneResponseExtraction(tags, body) + break + + case 'createTranscription': + case 'audio.transcriptions.create': + case 'createTranslation': + case 'audio.translations.create': + createAudioResponseExtraction(tags, body) + break + + case 'createImage': + case 'images.generate': + case 'createImageEdit': + case 'images.edit': + case 'createImageVariation': + case 'images.createVariation': + commonImageResponseExtraction(tags, body) + break + + case 'listModels': + case 'models.list': + listModelsResponseExtraction(tags, body) + break + + case 'retrieveModel': + case 'models.retrieve': + retrieveModelResponseExtraction(tags, body) + break + } +} + +function retrieveModelResponseExtraction (tags, body) { + tags['openai.response.owned_by'] = body.owned_by + tags['openai.response.parent'] = body.parent + tags['openai.response.root'] = body.root + + if (!body.permission) return + + tags['openai.response.permission.id'] = body.permission[0].id + tags['openai.response.permission.created'] = body.permission[0].created + tags['openai.response.permission.allow_create_engine'] = body.permission[0].allow_create_engine + tags['openai.response.permission.allow_sampling'] = body.permission[0].allow_sampling + tags['openai.response.permission.allow_logprobs'] = body.permission[0].allow_logprobs + tags['openai.response.permission.allow_search_indices'] = body.permission[0].allow_search_indices + tags['openai.response.permission.allow_view'] = body.permission[0].allow_view + tags['openai.response.permission.allow_fine_tuning'] = body.permission[0].allow_fine_tuning + tags['openai.response.permission.organization'] = body.permission[0].organization + tags['openai.response.permission.group'] = body.permission[0].group + tags['openai.response.permission.is_blocking'] = body.permission[0].is_blocking +} + +function commonLookupFineTuneRequestExtraction (tags, body) { + tags['openai.request.fine_tune_id'] = body.fine_tune_id + tags['openai.request.stream'] = !!body.stream // listFineTuneEvents +} + +function listModelsResponseExtraction (tags, body) { + if (!body.data) return + + tags['openai.response.count'] = body.data.length +} + +function commonImageResponseExtraction (tags, body) { + if (!body.data) return + + tags['openai.response.images_count'] = body.data.length + + for (let i = 0; i < body.data.length; i++) { + const image = body.data[i] + // exactly one of these two options is provided + tags[`openai.response.images.${i}.url`] = truncateText(image.url) + tags[`openai.response.images.${i}.b64_json`] = image.b64_json && 'returned' + } +} + +function createAudioResponseExtraction (tags, body) { + tags['openai.response.text'] = body.text + tags['openai.response.language'] = body.language + tags['openai.response.duration'] = body.duration + tags['openai.response.segments_count'] = defensiveArrayLength(body.segments) +} + +function createFineTuneRequestExtraction (tags, body) { + tags['openai.request.training_file'] = body.training_file + tags['openai.request.validation_file'] = body.validation_file + tags['openai.request.n_epochs'] = body.n_epochs + tags['openai.request.batch_size'] = body.batch_size + tags['openai.request.learning_rate_multiplier'] = body.learning_rate_multiplier + tags['openai.request.prompt_loss_weight'] = body.prompt_loss_weight + tags['openai.request.compute_classification_metrics'] = body.compute_classification_metrics + tags['openai.request.classification_n_classes'] = body.classification_n_classes + tags['openai.request.classification_positive_class'] = body.classification_positive_class + tags['openai.request.classification_betas_count'] = defensiveArrayLength(body.classification_betas) +} + +function commonFineTuneResponseExtraction (tags, body) { + tags['openai.response.events_count'] = defensiveArrayLength(body.events) + tags['openai.response.fine_tuned_model'] = body.fine_tuned_model + + const hyperparams = body.hyperparams || body.hyperparameters + const hyperparamsKey = body.hyperparams ? 'hyperparams' : 'hyperparameters' + + if (hyperparams) { + tags[`openai.response.${hyperparamsKey}.n_epochs`] = hyperparams.n_epochs + tags[`openai.response.${hyperparamsKey}.batch_size`] = hyperparams.batch_size + tags[`openai.response.${hyperparamsKey}.prompt_loss_weight`] = hyperparams.prompt_loss_weight + tags[`openai.response.${hyperparamsKey}.learning_rate_multiplier`] = hyperparams.learning_rate_multiplier + } + tags['openai.response.training_files_count'] = defensiveArrayLength(body.training_files || body.training_file) + tags['openai.response.result_files_count'] = defensiveArrayLength(body.result_files) + tags['openai.response.validation_files_count'] = defensiveArrayLength(body.validation_files || body.validation_file) + tags['openai.response.updated_at'] = body.updated_at + tags['openai.response.status'] = body.status +} + +// the OpenAI package appears to stream the content download then provide it all as a singular string +function downloadFileResponseExtraction (tags, body) { + if (!body.file) return + tags['openai.response.total_bytes'] = body.file.length +} + +function deleteFileResponseExtraction (tags, body) { + tags['openai.response.id'] = body.id +} + +function commonCreateAudioRequestExtraction (tags, body, openaiStore) { + tags['openai.request.response_format'] = body.response_format + tags['openai.request.language'] = body.language + + if (body.file !== null && typeof body.file === 'object' && body.file.path) { + const filename = path.basename(body.file.path) + tags['openai.request.filename'] = filename + openaiStore.file = filename + } +} + +function commonFileRequestExtraction (tags, body) { + tags['openai.request.purpose'] = body.purpose + + // User can provider either exact file contents or a file read stream + // With the stream we extract the filepath + // This is a best effort attempt to extract the filename during the request + if (body.file !== null && typeof body.file === 'object' && body.file.path) { + tags['openai.request.filename'] = path.basename(body.file.path) + } +} + +function createRetrieveFileResponseExtraction (tags, body) { + tags['openai.response.filename'] = body.filename + tags['openai.response.purpose'] = body.purpose + tags['openai.response.bytes'] = body.bytes + tags['openai.response.status'] = body.status + tags['openai.response.status_details'] = body.status_details +} + +function createEmbeddingResponseExtraction (tags, body, openaiStore) { + usageExtraction(tags, body, openaiStore) + + if (!body.data) return + + tags['openai.response.embeddings_count'] = body.data.length + for (let i = 0; i < body.data.length; i++) { + tags[`openai.response.embedding.${i}.embedding_length`] = body.data[i].embedding.length + } +} + +function commonListCountResponseExtraction (tags, body) { + if (!body.data) return + tags['openai.response.count'] = body.data.length +} + +// TODO: Is there ever more than one entry in body.results? +function createModerationResponseExtraction (tags, body) { + tags['openai.response.id'] = body.id + // tags[`openai.response.model`] = body.model // redundant, already extracted globally + + if (!body.results) return + + tags['openai.response.flagged'] = body.results[0].flagged + + for (const [category, match] of Object.entries(body.results[0].categories)) { + tags[`openai.response.categories.${category}`] = match + } + + for (const [category, score] of Object.entries(body.results[0].category_scores)) { + tags[`openai.response.category_scores.${category}`] = score + } +} + +// createCompletion, createChatCompletion, createEdit +function commonCreateResponseExtraction (tags, body, openaiStore, methodName) { + usageExtraction(tags, body, methodName, openaiStore) + + if (!body.choices) return + + tags['openai.response.choices_count'] = body.choices.length + + openaiStore.choices = body.choices + + for (let choiceIdx = 0; choiceIdx < body.choices.length; choiceIdx++) { + const choice = body.choices[choiceIdx] + + // logprobs can be null and we still want to tag it as 'returned' even when set to 'null' + const specifiesLogProb = Object.keys(choice).indexOf('logprobs') !== -1 + + tags[`openai.response.choices.${choiceIdx}.finish_reason`] = choice.finish_reason + tags[`openai.response.choices.${choiceIdx}.logprobs`] = specifiesLogProb ? 'returned' : undefined + tags[`openai.response.choices.${choiceIdx}.text`] = truncateText(choice.text) + + // createChatCompletion only + const message = choice.message || choice.delta // delta for streamed responses + if (message) { + tags[`openai.response.choices.${choiceIdx}.message.role`] = message.role + tags[`openai.response.choices.${choiceIdx}.message.content`] = truncateText(message.content) + tags[`openai.response.choices.${choiceIdx}.message.name`] = truncateText(message.name) + if (message.tool_calls) { + const toolCalls = message.tool_calls + for (let toolIdx = 0; toolIdx < toolCalls.length; toolIdx++) { + tags[`openai.response.choices.${choiceIdx}.message.tool_calls.${toolIdx}.function.name`] = + toolCalls[toolIdx].function.name + tags[`openai.response.choices.${choiceIdx}.message.tool_calls.${toolIdx}.function.arguments`] = + toolCalls[toolIdx].function.arguments + tags[`openai.response.choices.${choiceIdx}.message.tool_calls.${toolIdx}.id`] = + toolCalls[toolIdx].id + } + } + } + } +} + +// createCompletion, createChatCompletion, createEdit, createEmbedding +function usageExtraction (tags, body, methodName, openaiStore) { + let promptTokens = 0 + let completionTokens = 0 + let totalTokens = 0 + if (body && body.usage) { + promptTokens = body.usage.prompt_tokens + completionTokens = body.usage.completion_tokens + totalTokens = body.usage.total_tokens + } else if (body.model && ['chat.completions.create', 'completions.create'].includes(methodName)) { + // estimate tokens based on method name for completions and chat completions + const { model } = body + let promptEstimated = false + let completionEstimated = false + + // prompt tokens + const payload = openaiStore + const promptTokensCount = countPromptTokens(methodName, payload, model) + promptTokens = promptTokensCount.promptTokens + promptEstimated = promptTokensCount.promptEstimated + + // completion tokens + const completionTokensCount = countCompletionTokens(body, model) + completionTokens = completionTokensCount.completionTokens + completionEstimated = completionTokensCount.completionEstimated + + // total tokens + totalTokens = promptTokens + completionTokens + if (promptEstimated) tags['openai.response.usage.prompt_tokens_estimated'] = true + if (completionEstimated) tags['openai.response.usage.completion_tokens_estimated'] = true + } + + if (promptTokens != null) tags['openai.response.usage.prompt_tokens'] = promptTokens + if (completionTokens != null) tags['openai.response.usage.completion_tokens'] = completionTokens + if (totalTokens != null) tags['openai.response.usage.total_tokens'] = totalTokens +} + +function truncateApiKey (apiKey) { + return apiKey && `sk-...${apiKey.substr(apiKey.length - 4)}` +} + +/** + * for cleaning up prompt and response + */ +function truncateText (text) { + if (!text) return + if (typeof text !== 'string' || !text || (typeof text === 'string' && text.length === 0)) return + + text = text + .replace(RE_NEWLINE, '\\n') + .replace(RE_TAB, '\\t') + + if (text.length > MAX_TEXT_LEN) { + return text.substring(0, MAX_TEXT_LEN) + '...' + } + + return text +} + +function tagChatCompletionRequestContent (contents, messageIdx, tags) { + if (typeof contents === 'string') { + tags[`openai.request.messages.${messageIdx}.content`] = contents + } else if (Array.isArray(contents)) { + // content can also be an array of objects + // which represent text input or image url + for (const contentIdx in contents) { + const content = contents[contentIdx] + const type = content.type + tags[`openai.request.messages.${messageIdx}.content.${contentIdx}.type`] = content.type + if (type === 'text') { + tags[`openai.request.messages.${messageIdx}.content.${contentIdx}.text`] = truncateText(content.text) + } else if (type === 'image_url') { + tags[`openai.request.messages.${messageIdx}.content.${contentIdx}.image_url.url`] = + truncateText(content.image_url.url) + } + // unsupported type otherwise, won't be tagged + } + } + // unsupported type otherwise, won't be tagged +} + +// The server almost always responds with JSON +function coerceResponseBody (body, methodName) { + switch (methodName) { + case 'downloadFile': + case 'files.retrieveContent': + case 'files.content': + return { file: body } + } + + const type = typeof body + if (type === 'string') { + try { + return JSON.parse(body) + } catch { + return body + } + } else if (type === 'object') { + return body + } else { + return {} + } +} + +// This method is used to replace a dynamic URL segment with an asterisk +function lookupOperationEndpoint (operationId, url) { + switch (operationId) { + case 'deleteModel': + case 'models.del': + case 'retrieveModel': + case 'models.retrieve': + return '/v1/models/*' + + case 'deleteFile': + case 'files.del': + case 'retrieveFile': + case 'files.retrieve': + return '/v1/files/*' + + case 'downloadFile': + case 'files.retrieveContent': + case 'files.content': + return '/v1/files/*/content' + + case 'retrieveFineTune': + case 'fine-tune.retrieve': + return '/v1/fine-tunes/*' + case 'fine_tuning.jobs.retrieve': + return '/v1/fine_tuning/jobs/*' + + case 'listFineTuneEvents': + case 'fine-tune.listEvents': + return '/v1/fine-tunes/*/events' + case 'fine_tuning.jobs.listEvents': + return '/v1/fine_tuning/jobs/*/events' + + case 'cancelFineTune': + case 'fine-tune.cancel': + return '/v1/fine-tunes/*/cancel' + case 'fine_tuning.jobs.cancel': + return '/v1/fine_tuning/jobs/*/cancel' + } + + return url +} + +/** + * This function essentially normalizes the OpenAI method interface. Many methods accept + * a single object argument. The remaining ones take individual arguments. This function + * turns the individual arguments into an object to make extracting properties consistent. + */ +function normalizeRequestPayload (methodName, args) { + switch (methodName) { + case 'listModels': + case 'models.list': + case 'listFiles': + case 'files.list': + case 'listFineTunes': + case 'fine_tuning.jobs.list': + case 'fine-tune.list': + // no argument + return {} + + case 'retrieveModel': + case 'models.retrieve': + return { id: args[0] } + + case 'createFile': + return { + file: args[0], + purpose: args[1] + } + + case 'deleteFile': + case 'files.del': + case 'retrieveFile': + case 'files.retrieve': + case 'downloadFile': + case 'files.retrieveContent': + case 'files.content': + return { file_id: args[0] } + + case 'listFineTuneEvents': + case 'fine_tuning.jobs.listEvents': + case 'fine-tune.listEvents': + return { + fine_tune_id: args[0], + stream: args[1] // undocumented + } + + case 'retrieveFineTune': + case 'fine_tuning.jobs.retrieve': + case 'fine-tune.retrieve': + case 'deleteModel': + case 'models.del': + case 'cancelFineTune': + case 'fine_tuning.jobs.cancel': + case 'fine-tune.cancel': + return { fine_tune_id: args[0] } + + case 'createImageEdit': + return { + file: args[0], + prompt: args[1], // Note: order of prompt/mask in Node.js lib differs from public docs + mask: args[2], + n: args[3], + size: args[4], + response_format: args[5], + user: args[6] + } + + case 'createImageVariation': + return { + file: args[0], + n: args[1], + size: args[2], + response_format: args[3], + user: args[4] + } + + case 'createTranscription': + case 'createTranslation': + return { + file: args[0], + model: args[1], + prompt: args[2], + response_format: args[3], + temperature: args[4], + language: args[5] // only used for createTranscription + } + } + + // Remaining OpenAI methods take a single object argument + return args[0] +} + +/** + * Converts an array of tokens to a string + * If input is already a string it's returned + * In either case the value is truncated + + * It's intentional that the array be truncated arbitrarily, e.g. "[999, 888, 77..." + + * "foo" -> "foo" + * [1,2,3] -> "[1, 2, 3]" + */ +function normalizeStringOrTokenArray (input, truncate) { + const normalized = Array.isArray(input) + ? `[${input.join(', ')}]` // "[1, 2, 999]" + : input // "foo" + return truncate ? truncateText(normalized) : normalized +} + +function defensiveArrayLength (maybeArray) { + if (maybeArray) { + if (Array.isArray(maybeArray)) { + return maybeArray.length + } else { + // case of a singular item (ie body.training_file vs body.training_files) + return 1 + } + } + + return undefined +} + +module.exports = OpenAiTracingPlugin diff --git a/packages/dd-trace/src/appsec/addresses.js b/packages/dd-trace/src/appsec/addresses.js index 40c643012ef..cb540bc4e6f 100644 --- a/packages/dd-trace/src/appsec/addresses.js +++ b/packages/dd-trace/src/appsec/addresses.js @@ -28,6 +28,8 @@ module.exports = { DB_STATEMENT: 'server.db.statement', DB_SYSTEM: 'server.db.system', + SHELL_COMMAND: 'server.sys.shell.cmd', + LOGIN_SUCCESS: 'server.business_logic.users.login.success', LOGIN_FAILURE: 'server.business_logic.users.login.failure' } diff --git a/packages/dd-trace/src/appsec/channels.js b/packages/dd-trace/src/appsec/channels.js index 3081ed9974a..8e7f27211c6 100644 --- a/packages/dd-trace/src/appsec/channels.js +++ b/packages/dd-trace/src/appsec/channels.js @@ -6,6 +6,7 @@ const dc = require('dc-polyfill') module.exports = { bodyParser: dc.channel('datadog:body-parser:read:finish'), cookieParser: dc.channel('datadog:cookie-parser:read:finish'), + multerParser: dc.channel('datadog:multer:read:finish'), startGraphqlResolve: dc.channel('datadog:graphql:resolver:start'), graphqlMiddlewareChannel: dc.tracingChannel('datadog:apollo:middleware'), apolloChannel: dc.tracingChannel('datadog:apollo:request'), @@ -28,5 +29,6 @@ module.exports = { mysql2OuterQueryStart: dc.channel('datadog:mysql2:outerquery:start'), wafRunFinished: dc.channel('datadog:waf:run:finish'), fsOperationStart: dc.channel('apm:fs:operation:start'), - expressMiddlewareError: dc.channel('apm:express:middleware:error') + expressMiddlewareError: dc.channel('apm:express:middleware:error'), + childProcessExecutionTracingChannel: dc.tracingChannel('datadog:child_process:execution') } diff --git a/packages/dd-trace/src/appsec/iast/analyzers/header-injection-analyzer.js b/packages/dd-trace/src/appsec/iast/analyzers/header-injection-analyzer.js index 62330e87a07..a80af8b7646 100644 --- a/packages/dd-trace/src/appsec/iast/analyzers/header-injection-analyzer.js +++ b/packages/dd-trace/src/appsec/iast/analyzers/header-injection-analyzer.js @@ -6,7 +6,6 @@ const { getNodeModulesPaths } = require('../path-line') const { HEADER_NAME_VALUE_SEPARATOR } = require('../vulnerabilities-formatter/constants') const { getRanges } = require('../taint-tracking/operations') const { - HTTP_REQUEST_COOKIE_NAME, HTTP_REQUEST_COOKIE_VALUE, HTTP_REQUEST_HEADER_VALUE } = require('../taint-tracking/source-types') @@ -45,13 +44,7 @@ class HeaderInjectionAnalyzer extends InjectionAnalyzer { if (this.isExcludedHeaderName(lowerCasedHeaderName) || typeof value !== 'string') return const ranges = getRanges(iastContext, value) - if (ranges?.length > 0) { - return !(this.isCookieExclusion(lowerCasedHeaderName, ranges) || - this.isSameHeaderExclusion(lowerCasedHeaderName, ranges) || - this.isAccessControlAllowExclusion(lowerCasedHeaderName, ranges)) - } - - return false + return ranges?.length > 0 && !this.shouldIgnoreHeader(lowerCasedHeaderName, ranges) } _getEvidence (headerInfo, iastContext) { @@ -75,28 +68,52 @@ class HeaderInjectionAnalyzer extends InjectionAnalyzer { return EXCLUDED_HEADER_NAMES.includes(name) } - isCookieExclusion (name, ranges) { - if (name === 'set-cookie') { - return ranges - .every(range => range.iinfo.type === HTTP_REQUEST_COOKIE_VALUE || range.iinfo.type === HTTP_REQUEST_COOKIE_NAME) - } + isAllRangesFromHeader (ranges, headerName) { + return ranges + .every(range => + range.iinfo.type === HTTP_REQUEST_HEADER_VALUE && range.iinfo.parameterName?.toLowerCase() === headerName + ) + } - return false + isAllRangesFromSource (ranges, source) { + return ranges + .every(range => range.iinfo.type === source) } + /** + * Exclude access-control-allow-*: when the header starts with access-control-allow- and the + * source of the tainted range is a request header + */ isAccessControlAllowExclusion (name, ranges) { if (name?.startsWith('access-control-allow-')) { - return ranges - .every(range => range.iinfo.type === HTTP_REQUEST_HEADER_VALUE) + return this.isAllRangesFromSource(ranges, HTTP_REQUEST_HEADER_VALUE) } return false } + /** Exclude when the header is reflected from the request */ isSameHeaderExclusion (name, ranges) { return ranges.length === 1 && name === ranges[0].iinfo.parameterName?.toLowerCase() } + shouldIgnoreHeader (headerName, ranges) { + switch (headerName) { + case 'set-cookie': + /** Exclude set-cookie header if the source of all the tainted ranges are cookies */ + return this.isAllRangesFromSource(ranges, HTTP_REQUEST_COOKIE_VALUE) + case 'pragma': + /** Ignore pragma headers when the source is the cache control header. */ + return this.isAllRangesFromHeader(ranges, 'cache-control') + case 'transfer-encoding': + case 'content-encoding': + /** Ignore transfer and content encoding headers when the source is the accept encoding header. */ + return this.isAllRangesFromHeader(ranges, 'accept-encoding') + } + + return this.isAccessControlAllowExclusion(headerName, ranges) || this.isSameHeaderExclusion(headerName, ranges) + } + _getExcludedPaths () { return EXCLUDED_PATHS } diff --git a/packages/dd-trace/src/appsec/iast/taint-tracking/plugin.js b/packages/dd-trace/src/appsec/iast/taint-tracking/plugin.js index 48902323bec..ed46cbe5f2e 100644 --- a/packages/dd-trace/src/appsec/iast/taint-tracking/plugin.js +++ b/packages/dd-trace/src/appsec/iast/taint-tracking/plugin.js @@ -23,18 +23,26 @@ class TaintTrackingPlugin extends SourceIastPlugin { constructor () { super() this._type = 'taint-tracking' + this._taintedURLs = new WeakMap() } onConfigure () { + const onRequestBody = ({ req }) => { + const iastContext = getIastContext(storage.getStore()) + if (iastContext && iastContext.body !== req.body) { + this._taintTrackingHandler(HTTP_REQUEST_BODY, req, 'body', iastContext) + iastContext.body = req.body + } + } + this.addSub( { channelName: 'datadog:body-parser:read:finish', tag: HTTP_REQUEST_BODY }, - ({ req }) => { - const iastContext = getIastContext(storage.getStore()) - if (iastContext && iastContext.body !== req.body) { - this._taintTrackingHandler(HTTP_REQUEST_BODY, req, 'body', iastContext) - iastContext.body = req.body - } - } + onRequestBody + ) + + this.addSub( + { channelName: 'datadog:multer:read:finish', tag: HTTP_REQUEST_BODY }, + onRequestBody ) this.addSub( @@ -81,6 +89,46 @@ class TaintTrackingPlugin extends SourceIastPlugin { } ) + const urlResultTaintedProperties = ['host', 'origin', 'hostname'] + this.addSub( + { channelName: 'datadog:url:parse:finish' }, + ({ input, base, parsed, isURL }) => { + const iastContext = getIastContext(storage.getStore()) + let ranges + + if (base) { + ranges = getRanges(iastContext, base) + } else { + ranges = getRanges(iastContext, input) + } + + if (ranges?.length) { + if (isURL) { + this._taintedURLs.set(parsed, ranges[0]) + } else { + urlResultTaintedProperties.forEach(param => { + this._taintTrackingHandler(ranges[0].iinfo.type, parsed, param, iastContext) + }) + } + } + } + ) + + this.addSub( + { channelName: 'datadog:url:getter:finish' }, + (context) => { + if (!urlResultTaintedProperties.includes(context.property)) return + + const origRange = this._taintedURLs.get(context.urlObject) + if (!origRange) return + + const iastContext = getIastContext(storage.getStore()) + if (!iastContext) return + + context.result = + newTaintedString(iastContext, context.result, origRange.iinfo.parameterName, origRange.iinfo.type) + }) + // this is a special case to increment INSTRUMENTED_SOURCE metric for header this.addInstrumentedSource('http', [HTTP_REQUEST_HEADER_VALUE, HTTP_REQUEST_HEADER_NAME]) } diff --git a/packages/dd-trace/src/appsec/iast/vulnerability-reporter.js b/packages/dd-trace/src/appsec/iast/vulnerability-reporter.js index cc25d51b1e9..e2d1619b118 100644 --- a/packages/dd-trace/src/appsec/iast/vulnerability-reporter.js +++ b/packages/dd-trace/src/appsec/iast/vulnerability-reporter.js @@ -1,10 +1,11 @@ 'use strict' -const { MANUAL_KEEP } = require('../../../../../ext/tags') const LRU = require('lru-cache') const vulnerabilitiesFormatter = require('./vulnerabilities-formatter') const { IAST_ENABLED_TAG_KEY, IAST_JSON_TAG_KEY } = require('./tags') const standalone = require('../standalone') +const { SAMPLING_MECHANISM_APPSEC } = require('../../constants') +const { keepTrace } = require('../../priority_sampler') const VULNERABILITIES_KEY = 'vulnerabilities' const VULNERABILITY_HASHES_MAX_SIZE = 1000 @@ -56,9 +57,10 @@ function sendVulnerabilities (vulnerabilities, rootSpan) { const tags = {} // TODO: Store this outside of the span and set the tag in the exporter. tags[IAST_JSON_TAG_KEY] = JSON.stringify(jsonToSend) - tags[MANUAL_KEEP] = 'true' span.addTags(tags) + keepTrace(span, SAMPLING_MECHANISM_APPSEC) + standalone.sample(span) if (!rootSpan) span.finish() diff --git a/packages/dd-trace/src/appsec/index.js b/packages/dd-trace/src/appsec/index.js index f3656e459e8..f4f9a4db036 100644 --- a/packages/dd-trace/src/appsec/index.js +++ b/packages/dd-trace/src/appsec/index.js @@ -6,6 +6,7 @@ const remoteConfig = require('./remote_config') const { bodyParser, cookieParser, + multerParser, incomingHttpRequestStart, incomingHttpRequestEnd, passportVerify, @@ -58,6 +59,7 @@ function enable (_config) { apiSecuritySampler.configure(_config.appsec) bodyParser.subscribe(onRequestBodyParsed) + multerParser.subscribe(onRequestBodyParsed) cookieParser.subscribe(onRequestCookieParser) incomingHttpRequestStart.subscribe(incomingHttpStartTranslator) incomingHttpRequestEnd.subscribe(incomingHttpEndTranslator) @@ -299,6 +301,7 @@ function disable () { // Channel#unsubscribe() is undefined for non active channels if (bodyParser.hasSubscribers) bodyParser.unsubscribe(onRequestBodyParsed) + if (multerParser.hasSubscribers) multerParser.unsubscribe(onRequestBodyParsed) if (cookieParser.hasSubscribers) cookieParser.unsubscribe(onRequestCookieParser) if (incomingHttpRequestStart.hasSubscribers) incomingHttpRequestStart.unsubscribe(incomingHttpStartTranslator) if (incomingHttpRequestEnd.hasSubscribers) incomingHttpRequestEnd.unsubscribe(incomingHttpEndTranslator) diff --git a/packages/dd-trace/src/appsec/rasp/command_injection.js b/packages/dd-trace/src/appsec/rasp/command_injection.js new file mode 100644 index 00000000000..8d6d977aace --- /dev/null +++ b/packages/dd-trace/src/appsec/rasp/command_injection.js @@ -0,0 +1,49 @@ +'use strict' + +const { childProcessExecutionTracingChannel } = require('../channels') +const { RULE_TYPES, handleResult } = require('./utils') +const { storage } = require('../../../../datadog-core') +const addresses = require('../addresses') +const waf = require('../waf') + +let config + +function enable (_config) { + config = _config + + childProcessExecutionTracingChannel.subscribe({ + start: analyzeCommandInjection + }) +} + +function disable () { + if (childProcessExecutionTracingChannel.start.hasSubscribers) { + childProcessExecutionTracingChannel.unsubscribe({ + start: analyzeCommandInjection + }) + } +} + +function analyzeCommandInjection ({ file, fileArgs, shell, abortController }) { + if (!file || !shell) return + + const store = storage.getStore() + const req = store?.req + if (!req) return + + const commandParams = fileArgs ? [file, ...fileArgs] : file + + const persistent = { + [addresses.SHELL_COMMAND]: commandParams + } + + const result = waf.run({ persistent }, req, RULE_TYPES.COMMAND_INJECTION) + + const res = store?.res + handleResult(result, req, res, abortController, config) +} + +module.exports = { + enable, + disable +} diff --git a/packages/dd-trace/src/appsec/rasp/index.js b/packages/dd-trace/src/appsec/rasp/index.js index d5a1312872a..4a65518495d 100644 --- a/packages/dd-trace/src/appsec/rasp/index.js +++ b/packages/dd-trace/src/appsec/rasp/index.js @@ -6,6 +6,7 @@ const { block, isBlocked } = require('../blocking') const ssrf = require('./ssrf') const sqli = require('./sql_injection') const lfi = require('./lfi') +const cmdi = require('./command_injection') const { DatadogRaspAbortError } = require('./utils') @@ -95,6 +96,7 @@ function enable (config) { ssrf.enable(config) sqli.enable(config) lfi.enable(config) + cmdi.enable(config) process.on('uncaughtExceptionMonitor', handleUncaughtExceptionMonitor) expressMiddlewareError.subscribe(blockOnDatadogRaspAbortError) @@ -104,6 +106,7 @@ function disable () { ssrf.disable() sqli.disable() lfi.disable() + cmdi.disable() process.off('uncaughtExceptionMonitor', handleUncaughtExceptionMonitor) if (expressMiddlewareError.hasSubscribers) expressMiddlewareError.unsubscribe(blockOnDatadogRaspAbortError) diff --git a/packages/dd-trace/src/appsec/rasp/utils.js b/packages/dd-trace/src/appsec/rasp/utils.js index c4ee4f55c3f..bdf3596209e 100644 --- a/packages/dd-trace/src/appsec/rasp/utils.js +++ b/packages/dd-trace/src/appsec/rasp/utils.js @@ -12,9 +12,10 @@ if (abortOnUncaughtException) { } const RULE_TYPES = { - SSRF: 'ssrf', + COMMAND_INJECTION: 'command_injection', + LFI: 'lfi', SQL_INJECTION: 'sql_injection', - LFI: 'lfi' + SSRF: 'ssrf' } class DatadogRaspAbortError extends Error { diff --git a/packages/dd-trace/src/appsec/recommended.json b/packages/dd-trace/src/appsec/recommended.json index 158c33a8ccd..01156e6f206 100644 --- a/packages/dd-trace/src/appsec/recommended.json +++ b/packages/dd-trace/src/appsec/recommended.json @@ -1,7 +1,7 @@ { "version": "2.2", "metadata": { - "rules_version": "1.13.1" + "rules_version": "1.13.2" }, "rules": [ { @@ -6335,7 +6335,6 @@ { "id": "rasp-934-100", "name": "Server-side request forgery exploit", - "enabled": false, "tags": { "type": "ssrf", "category": "vulnerability_trigger", @@ -6384,7 +6383,6 @@ { "id": "rasp-942-100", "name": "SQL injection exploit", - "enabled": false, "tags": { "type": "sql_injection", "category": "vulnerability_trigger", @@ -6424,7 +6422,7 @@ } ] }, - "operator": "sqli_detector" + "operator": "sqli_detector@v2" } ], "transformers": [], diff --git a/packages/dd-trace/src/appsec/remote_config/capabilities.js b/packages/dd-trace/src/appsec/remote_config/capabilities.js index 3eda140a986..18c11a92104 100644 --- a/packages/dd-trace/src/appsec/remote_config/capabilities.js +++ b/packages/dd-trace/src/appsec/remote_config/capabilities.js @@ -20,6 +20,7 @@ module.exports = { ASM_RASP_SQLI: 1n << 21n, ASM_RASP_LFI: 1n << 22n, ASM_RASP_SSRF: 1n << 23n, + ASM_RASP_SHI: 1n << 24n, APM_TRACING_SAMPLE_RULES: 1n << 29n, ASM_ENDPOINT_FINGERPRINT: 1n << 32n, ASM_NETWORK_FINGERPRINT: 1n << 34n, diff --git a/packages/dd-trace/src/appsec/remote_config/index.js b/packages/dd-trace/src/appsec/remote_config/index.js index 2b7eea57c82..9f0869351af 100644 --- a/packages/dd-trace/src/appsec/remote_config/index.js +++ b/packages/dd-trace/src/appsec/remote_config/index.js @@ -83,6 +83,7 @@ function enableWafUpdate (appsecConfig) { rc.updateCapabilities(RemoteConfigCapabilities.ASM_RASP_SQLI, true) rc.updateCapabilities(RemoteConfigCapabilities.ASM_RASP_SSRF, true) rc.updateCapabilities(RemoteConfigCapabilities.ASM_RASP_LFI, true) + rc.updateCapabilities(RemoteConfigCapabilities.ASM_RASP_SHI, true) } // TODO: delete noop handlers and kPreUpdate and replace with batched handlers @@ -114,6 +115,7 @@ function disableWafUpdate () { rc.updateCapabilities(RemoteConfigCapabilities.ASM_RASP_SQLI, false) rc.updateCapabilities(RemoteConfigCapabilities.ASM_RASP_SSRF, false) rc.updateCapabilities(RemoteConfigCapabilities.ASM_RASP_LFI, false) + rc.updateCapabilities(RemoteConfigCapabilities.ASM_RASP_SHI, false) rc.removeProductHandler('ASM_DATA') rc.removeProductHandler('ASM_DD') diff --git a/packages/dd-trace/src/appsec/reporter.js b/packages/dd-trace/src/appsec/reporter.js index dd2bde9fb06..3cd23b1f003 100644 --- a/packages/dd-trace/src/appsec/reporter.js +++ b/packages/dd-trace/src/appsec/reporter.js @@ -13,8 +13,9 @@ const { getRequestMetrics } = require('./telemetry') const zlib = require('zlib') -const { MANUAL_KEEP } = require('../../../../ext/tags') const standalone = require('./standalone') +const { SAMPLING_MECHANISM_APPSEC } = require('../constants') +const { keepTrace } = require('../priority_sampler') // default limiter, configurable with setRateLimit() let limiter = new Limiter(100) @@ -96,8 +97,6 @@ function reportWafInit (wafVersion, rulesVersion, diagnosticsRules = {}) { metricsQueue.set('_dd.appsec.event_rules.errors', JSON.stringify(diagnosticsRules.errors)) } - metricsQueue.set(MANUAL_KEEP, 'true') - incrementWafInitMetric(wafVersion, rulesVersion) } @@ -129,7 +128,7 @@ function reportAttack (attackData) { } if (limiter.isAllowed()) { - newTags[MANUAL_KEEP] = 'true' + keepTrace(rootSpan, SAMPLING_MECHANISM_APPSEC) standalone.sample(rootSpan) } @@ -184,6 +183,8 @@ function finishRequest (req, res) { if (metricsQueue.size) { rootSpan.addTags(Object.fromEntries(metricsQueue)) + keepTrace(rootSpan, SAMPLING_MECHANISM_APPSEC) + standalone.sample(rootSpan) metricsQueue.clear() diff --git a/packages/dd-trace/src/appsec/sdk/track_event.js b/packages/dd-trace/src/appsec/sdk/track_event.js index 36c40093b19..e95081314de 100644 --- a/packages/dd-trace/src/appsec/sdk/track_event.js +++ b/packages/dd-trace/src/appsec/sdk/track_event.js @@ -2,10 +2,11 @@ const log = require('../../log') const { getRootSpan } = require('./utils') -const { MANUAL_KEEP } = require('../../../../../ext/tags') const { setUserTags } = require('./set_user') const standalone = require('../standalone') const waf = require('../waf') +const { SAMPLING_MECHANISM_APPSEC } = require('../../constants') +const { keepTrace } = require('../../priority_sampler') function trackUserLoginSuccessEvent (tracer, user, metadata) { // TODO: better user check here and in _setUser() ? @@ -55,9 +56,10 @@ function trackEvent (eventName, fields, sdkMethodName, rootSpan, mode) { return } + keepTrace(rootSpan, SAMPLING_MECHANISM_APPSEC) + const tags = { - [`appsec.events.${eventName}.track`]: 'true', - [MANUAL_KEEP]: 'true' + [`appsec.events.${eventName}.track`]: 'true' } if (mode === 'sdk') { diff --git a/packages/dd-trace/src/azure_metadata.js b/packages/dd-trace/src/azure_metadata.js new file mode 100644 index 00000000000..94c29c9dd16 --- /dev/null +++ b/packages/dd-trace/src/azure_metadata.js @@ -0,0 +1,120 @@ +'use strict' + +// eslint-disable-next-line max-len +// Modeled after https://github.com/DataDog/libdatadog/blob/f3994857a59bb5679a65967138c5a3aec418a65f/ddcommon/src/azure_app_services.rs + +const os = require('os') +const { getIsAzureFunction } = require('./serverless') + +function extractSubscriptionID (ownerName) { + if (ownerName !== undefined) { + const subId = ownerName.split('+')[0].trim() + if (subId.length > 0) { + return subId + } + } + return undefined +} + +function extractResourceGroup (ownerName) { + return /.+\+(.+)-.+webspace(-Linux)?/.exec(ownerName)?.[1] +} + +function buildResourceID (subscriptionID, siteName, resourceGroup) { + if (subscriptionID === undefined || siteName === undefined || resourceGroup === undefined) { + return undefined + } + return `/subscriptions/${subscriptionID}/resourcegroups/${resourceGroup}/providers/microsoft.web/sites/${siteName}` + .toLowerCase() +} + +function trimObject (obj) { + Object.entries(obj) + .filter(([_, value]) => value === undefined) + .forEach(([key, _]) => { delete obj[key] }) + return obj +} + +function buildMetadata () { + const { + COMPUTERNAME, + DD_AAS_DOTNET_EXTENSION_VERSION, + FUNCTIONS_EXTENSION_VERSION, + FUNCTIONS_WORKER_RUNTIME, + FUNCTIONS_WORKER_RUNTIME_VERSION, + WEBSITE_INSTANCE_ID, + WEBSITE_OWNER_NAME, + WEBSITE_OS, + WEBSITE_RESOURCE_GROUP, + WEBSITE_SITE_NAME + } = process.env + + const subscriptionID = extractSubscriptionID(WEBSITE_OWNER_NAME) + + const siteName = WEBSITE_SITE_NAME + + const [siteKind, siteType] = getIsAzureFunction() + ? ['functionapp', 'function'] + : ['app', 'app'] + + const resourceGroup = WEBSITE_RESOURCE_GROUP ?? extractResourceGroup(WEBSITE_OWNER_NAME) + + return trimObject({ + extensionVersion: DD_AAS_DOTNET_EXTENSION_VERSION, + functionRuntimeVersion: FUNCTIONS_EXTENSION_VERSION, + instanceID: WEBSITE_INSTANCE_ID, + instanceName: COMPUTERNAME, + operatingSystem: WEBSITE_OS ?? os.platform(), + resourceGroup, + resourceID: buildResourceID(subscriptionID, siteName, resourceGroup), + runtime: FUNCTIONS_WORKER_RUNTIME, + runtimeVersion: FUNCTIONS_WORKER_RUNTIME_VERSION, + siteKind, + siteName, + siteType, + subscriptionID + }) +} + +function getAzureAppMetadata () { + // DD_AZURE_APP_SERVICES is an environment variable introduced by the .NET APM team and is set automatically for + // anyone using the Datadog APM Extensions (.NET, Java, or Node) for Windows Azure App Services + // eslint-disable-next-line max-len + // See: https://github.com/DataDog/datadog-aas-extension/blob/01f94b5c28b7fa7a9ab264ca28bd4e03be603900/node/src/applicationHost.xdt#L20-L21 + return process.env.DD_AZURE_APP_SERVICES !== undefined ? buildMetadata() : undefined +} + +function getAzureFunctionMetadata () { + return getIsAzureFunction() ? buildMetadata() : undefined +} + +// eslint-disable-next-line max-len +// Modeled after https://github.com/DataDog/libdatadog/blob/92272e90a7919f07178f3246ef8f82295513cfed/profiling/src/exporter/mod.rs#L187 +// eslint-disable-next-line max-len +// and https://github.com/DataDog/libdatadog/blob/f3994857a59bb5679a65967138c5a3aec418a65f/trace-utils/src/trace_utils.rs#L533 +function getAzureTagsFromMetadata (metadata) { + if (metadata === undefined) { + return {} + } + return trimObject({ + 'aas.environment.extension_version': metadata.extensionVersion, + 'aas.environment.function_runtime': metadata.functionRuntimeVersion, + 'aas.environment.instance_id': metadata.instanceID, + 'aas.environment.instance_name': metadata.instanceName, + 'aas.environment.os': metadata.operatingSystem, + 'aas.environment.runtime': metadata.runtime, + 'aas.environment.runtime_version': metadata.runtimeVersion, + 'aas.resource.group': metadata.resourceGroup, + 'aas.resource.id': metadata.resourceID, + 'aas.site.kind': metadata.siteKind, + 'aas.site.name': metadata.siteName, + 'aas.site.type': metadata.siteType, + 'aas.subscription.id': metadata.subscriptionID + }) +} + +module.exports = { + getAzureAppMetadata, + getAzureFunctionMetadata, + getAzureTagsFromMetadata +} diff --git a/packages/dd-trace/src/ci-visibility/dynamic-instrumentation/index.js b/packages/dd-trace/src/ci-visibility/dynamic-instrumentation/index.js new file mode 100644 index 00000000000..97323d02407 --- /dev/null +++ b/packages/dd-trace/src/ci-visibility/dynamic-instrumentation/index.js @@ -0,0 +1,97 @@ +'use strict' + +const { join } = require('path') +const { Worker } = require('worker_threads') +const { randomUUID } = require('crypto') +const log = require('../../log') + +const probeIdToResolveBreakpointSet = new Map() +const probeIdToResolveBreakpointHit = new Map() + +class TestVisDynamicInstrumentation { + constructor () { + this.worker = null + this._readyPromise = new Promise(resolve => { + this._onReady = resolve + }) + this.breakpointSetChannel = new MessageChannel() + this.breakpointHitChannel = new MessageChannel() + } + + // Return 3 elements: + // 1. Snapshot ID + // 2. Promise that's resolved when the breakpoint is set + // 3. Promise that's resolved when the breakpoint is hit + addLineProbe ({ file, line }) { + const snapshotId = randomUUID() + const probeId = randomUUID() + + this.breakpointSetChannel.port2.postMessage({ + snapshotId, + probe: { id: probeId, file, line } + }) + + return [ + snapshotId, + new Promise(resolve => { + probeIdToResolveBreakpointSet.set(probeId, resolve) + }), + new Promise(resolve => { + probeIdToResolveBreakpointHit.set(probeId, resolve) + }) + ] + } + + isReady () { + return this._readyPromise + } + + start () { + if (this.worker) return + + const { NODE_OPTIONS, ...envWithoutNodeOptions } = process.env + + log.debug('Starting Test Visibility - Dynamic Instrumentation client...') + + this.worker = new Worker( + join(__dirname, 'worker', 'index.js'), + { + execArgv: [], + env: envWithoutNodeOptions, + workerData: { + breakpointSetChannel: this.breakpointSetChannel.port1, + breakpointHitChannel: this.breakpointHitChannel.port1 + }, + transferList: [this.breakpointSetChannel.port1, this.breakpointHitChannel.port1] + } + ) + this.worker.on('online', () => { + log.debug('Test Visibility - Dynamic Instrumentation client is ready') + this._onReady() + }) + + // Allow the parent to exit even if the worker is still running + this.worker.unref() + + this.breakpointSetChannel.port2.on('message', (message) => { + const { probeId } = message + const resolve = probeIdToResolveBreakpointSet.get(probeId) + if (resolve) { + resolve() + probeIdToResolveBreakpointSet.delete(probeId) + } + }).unref() + + this.breakpointHitChannel.port2.on('message', (message) => { + const { snapshot } = message + const { probe: { id: probeId } } = snapshot + const resolve = probeIdToResolveBreakpointHit.get(probeId) + if (resolve) { + resolve({ snapshot }) + probeIdToResolveBreakpointHit.delete(probeId) + } + }).unref() + } +} + +module.exports = new TestVisDynamicInstrumentation() diff --git a/packages/dd-trace/src/ci-visibility/dynamic-instrumentation/worker/index.js b/packages/dd-trace/src/ci-visibility/dynamic-instrumentation/worker/index.js new file mode 100644 index 00000000000..4bef76e6343 --- /dev/null +++ b/packages/dd-trace/src/ci-visibility/dynamic-instrumentation/worker/index.js @@ -0,0 +1,90 @@ +'use strict' + +const { workerData: { breakpointSetChannel, breakpointHitChannel } } = require('worker_threads') +// TODO: move debugger/devtools_client/session to common place +const session = require('../../../debugger/devtools_client/session') +// TODO: move debugger/devtools_client/snapshot to common place +const { getLocalStateForCallFrame } = require('../../../debugger/devtools_client/snapshot') +// TODO: move debugger/devtools_client/state to common place +const { + findScriptFromPartialPath, + getStackFromCallFrames +} = require('../../../debugger/devtools_client/state') +const log = require('../../../log') + +let sessionStarted = false + +const breakpointIdToSnapshotId = new Map() +const breakpointIdToProbe = new Map() + +session.on('Debugger.paused', async ({ params: { hitBreakpoints: [hitBreakpoint], callFrames } }) => { + const probe = breakpointIdToProbe.get(hitBreakpoint) + if (!probe) { + log.warn(`No probe found for breakpoint ${hitBreakpoint}`) + return session.post('Debugger.resume') + } + + const stack = getStackFromCallFrames(callFrames) + + const getLocalState = await getLocalStateForCallFrame(callFrames[0]) + + await session.post('Debugger.resume') + + const snapshotId = breakpointIdToSnapshotId.get(hitBreakpoint) + + const snapshot = { + id: snapshotId, + timestamp: Date.now(), + probe: { + id: probe.probeId, + version: '0', + location: probe.location + }, + stack, + language: 'javascript' + } + + const state = getLocalState() + if (state) { + snapshot.captures = { + lines: { [probe.location.lines[0]]: { locals: state } } + } + } + + breakpointHitChannel.postMessage({ snapshot }) +}) + +// TODO: add option to remove breakpoint +breakpointSetChannel.on('message', async ({ snapshotId, probe: { id: probeId, file, line } }) => { + await addBreakpoint(snapshotId, { probeId, file, line }) + breakpointSetChannel.postMessage({ probeId }) +}) + +async function addBreakpoint (snapshotId, probe) { + if (!sessionStarted) await start() + const { file, line } = probe + + probe.location = { file, lines: [String(line)] } + + const script = findScriptFromPartialPath(file) + if (!script) throw new Error(`No loaded script found for ${file}`) + + const [path, scriptId] = script + + log.debug(`Adding breakpoint at ${path}:${line}`) + + const { breakpointId } = await session.post('Debugger.setBreakpoint', { + location: { + scriptId, + lineNumber: line - 1 + } + }) + + breakpointIdToProbe.set(breakpointId, probe) + breakpointIdToSnapshotId.set(breakpointId, snapshotId) +} + +function start () { + sessionStarted = true + return session.post('Debugger.enable') // return instead of await to reduce number of promises created +} diff --git a/packages/dd-trace/src/ci-visibility/exporters/agent-proxy/index.js b/packages/dd-trace/src/ci-visibility/exporters/agent-proxy/index.js index bb1367057f4..991031dd3e4 100644 --- a/packages/dd-trace/src/ci-visibility/exporters/agent-proxy/index.js +++ b/packages/dd-trace/src/ci-visibility/exporters/agent-proxy/index.js @@ -7,6 +7,7 @@ const CiVisibilityExporter = require('../ci-visibility-exporter') const AGENT_EVP_PROXY_PATH_PREFIX = '/evp_proxy/v' const AGENT_EVP_PROXY_PATH_REGEX = /\/evp_proxy\/v(\d+)\/?/ +const AGENT_DEBUGGER_INPUT = '/debugger/v1/input' function getLatestEvpProxyVersion (err, agentInfo) { if (err) { @@ -24,6 +25,10 @@ function getLatestEvpProxyVersion (err, agentInfo) { }, 0) } +function getCanForwardDebuggerLogs (err, agentInfo) { + return !err && agentInfo.endpoints.some(endpoint => endpoint === AGENT_DEBUGGER_INPUT) +} + class AgentProxyCiVisibilityExporter extends CiVisibilityExporter { constructor (config) { super(config) @@ -33,7 +38,8 @@ class AgentProxyCiVisibilityExporter extends CiVisibilityExporter { prioritySampler, lookup, protocolVersion, - headers + headers, + isTestDynamicInstrumentationEnabled } = config this.getAgentInfo((err, agentInfo) => { @@ -60,6 +66,18 @@ class AgentProxyCiVisibilityExporter extends CiVisibilityExporter { url: this._url, evpProxyPrefix }) + if (isTestDynamicInstrumentationEnabled) { + const canFowardLogs = getCanForwardDebuggerLogs(err, agentInfo) + if (canFowardLogs) { + const DynamicInstrumentationLogsWriter = require('../agentless/di-logs-writer') + this._logsWriter = new DynamicInstrumentationLogsWriter({ + url: this._url, + tags, + isAgentProxy: true + }) + this._canForwardLogs = true + } + } } else { this._writer = new AgentWriter({ url: this._url, diff --git a/packages/dd-trace/src/ci-visibility/exporters/agentless/di-logs-writer.js b/packages/dd-trace/src/ci-visibility/exporters/agentless/di-logs-writer.js new file mode 100644 index 00000000000..eebc3c5e6a9 --- /dev/null +++ b/packages/dd-trace/src/ci-visibility/exporters/agentless/di-logs-writer.js @@ -0,0 +1,53 @@ +'use strict' +const request = require('../../../exporters/common/request') +const log = require('../../../log') +const { safeJSONStringify } = require('../../../exporters/common/util') +const { JSONEncoder } = require('../../encode/json-encoder') + +const BaseWriter = require('../../../exporters/common/writer') + +// Writer used by the integration between Dynamic Instrumentation and Test Visibility +// It is used to encode and send logs to both the logs intake directly and the +// `/debugger/v1/input` endpoint in the agent, which is a proxy to the logs intake. +class DynamicInstrumentationLogsWriter extends BaseWriter { + constructor ({ url, timeout, isAgentProxy = false }) { + super(...arguments) + this._url = url + this._encoder = new JSONEncoder() + this._isAgentProxy = isAgentProxy + this.timeout = timeout + } + + _sendPayload (data, _, done) { + const options = { + path: '/api/v2/logs', + method: 'POST', + headers: { + 'dd-api-key': process.env.DATADOG_API_KEY || process.env.DD_API_KEY, + 'Content-Type': 'application/json' + }, + // TODO: what's a good value for timeout for the logs intake? + timeout: this.timeout || 15000, + url: this._url + } + + if (this._isAgentProxy) { + delete options.headers['dd-api-key'] + options.path = '/debugger/v1/input' + } + + log.debug(() => `Request to the logs intake: ${safeJSONStringify(options)}`) + + request(data, options, (err, res) => { + if (err) { + log.error(err) + done() + return + } + log.debug(`Response from the logs intake: ${res}`) + done() + }) + } +} + +module.exports = DynamicInstrumentationLogsWriter diff --git a/packages/dd-trace/src/ci-visibility/exporters/agentless/index.js b/packages/dd-trace/src/ci-visibility/exporters/agentless/index.js index dcbded6a54e..5895bb573cd 100644 --- a/packages/dd-trace/src/ci-visibility/exporters/agentless/index.js +++ b/packages/dd-trace/src/ci-visibility/exporters/agentless/index.js @@ -9,10 +9,11 @@ const log = require('../../../log') class AgentlessCiVisibilityExporter extends CiVisibilityExporter { constructor (config) { super(config) - const { tags, site, url } = config + const { tags, site, url, isTestDynamicInstrumentationEnabled } = config // we don't need to request /info because we are using agentless by configuration this._isInitialized = true this._resolveCanUseCiVisProtocol(true) + this._canForwardLogs = true this._url = url || new URL(`https://citestcycle-intake.${site}`) this._writer = new Writer({ url: this._url, tags }) @@ -20,6 +21,12 @@ class AgentlessCiVisibilityExporter extends CiVisibilityExporter { this._coverageUrl = url || new URL(`https://citestcov-intake.${site}`) this._coverageWriter = new CoverageWriter({ url: this._coverageUrl }) + if (isTestDynamicInstrumentationEnabled) { + const DynamicInstrumentationLogsWriter = require('./di-logs-writer') + this._logsUrl = url || new URL(`https://http-intake.logs.${site}`) + this._logsWriter = new DynamicInstrumentationLogsWriter({ url: this._logsUrl, tags }) + } + this._apiUrl = url || new URL(`https://api.${site}`) // Agentless is always gzip compatible this._isGzipCompatible = true diff --git a/packages/dd-trace/src/ci-visibility/exporters/ci-visibility-exporter.js b/packages/dd-trace/src/ci-visibility/exporters/ci-visibility-exporter.js index 9dabd34f7f3..f555603e0cb 100644 --- a/packages/dd-trace/src/ci-visibility/exporters/ci-visibility-exporter.js +++ b/packages/dd-trace/src/ci-visibility/exporters/ci-visibility-exporter.js @@ -8,6 +8,7 @@ const { getSkippableSuites: getSkippableSuitesRequest } = require('../intelligen const { getKnownTests: getKnownTestsRequest } = require('../early-flake-detection/get-known-tests') const log = require('../../log') const AgentInfoExporter = require('../../exporters/common/agent-info-exporter') +const { GIT_REPOSITORY_URL, GIT_COMMIT_SHA } = require('../../plugins/util/tags') function getTestConfigurationTags (tags) { if (!tags) { @@ -36,6 +37,7 @@ class CiVisibilityExporter extends AgentInfoExporter { super(config) this._timer = undefined this._coverageTimer = undefined + this._logsTimer = undefined this._coverageBuffer = [] // The library can use new features like ITR and test suite level visibility // AKA CI Vis Protocol @@ -255,6 +257,47 @@ class CiVisibilityExporter extends AgentInfoExporter { this._export(formattedCoverage, this._coverageWriter, '_coverageTimer') } + formatLogMessage (testConfiguration, logMessage) { + const { + [GIT_REPOSITORY_URL]: gitRepositoryUrl, + [GIT_COMMIT_SHA]: gitCommitSha + } = testConfiguration + + const { service, env, version } = this._config + + return { + ddtags: [ + ...(logMessage.ddtags || []), + `${GIT_REPOSITORY_URL}:${gitRepositoryUrl}`, + `${GIT_COMMIT_SHA}:${gitCommitSha}` + ].join(','), + level: 'error', + service, + dd: { + ...(logMessage.dd || []), + service, + env, + version + }, + ddsource: 'dd_debugger', + ...logMessage + } + } + + // DI logs + exportDiLogs (testConfiguration, logMessage) { + // TODO: could we lose logs if it's not initialized? + if (!this._config.isTestDynamicInstrumentationEnabled || !this._isInitialized || !this._canForwardLogs) { + return + } + + this._export( + this.formatLogMessage(testConfiguration, logMessage), + this._logsWriter, + '_logsTimer' + ) + } + flush (done = () => {}) { if (!this._isInitialized) { return done() diff --git a/packages/dd-trace/src/config.js b/packages/dd-trace/src/config.js index fa502ccb5a2..ec1df615627 100644 --- a/packages/dd-trace/src/config.js +++ b/packages/dd-trace/src/config.js @@ -462,6 +462,9 @@ class Config { this._setValue(defaults, 'appsec.stackTrace.maxDepth', 32) this._setValue(defaults, 'appsec.stackTrace.maxStackTraces', 2) this._setValue(defaults, 'appsec.wafTimeout', 5e3) // Β΅s + this._setValue(defaults, 'baggageMaxBytes', 8192) + this._setValue(defaults, 'baggageMaxItems', 64) + this._setValue(defaults, 'ciVisibilityTestSessionName', '') this._setValue(defaults, 'clientIpEnabled', false) this._setValue(defaults, 'clientIpHeader', null) this._setValue(defaults, 'codeOriginForSpans.enabled', false) @@ -501,10 +504,16 @@ class Config { this._setValue(defaults, 'isGitUploadEnabled', false) this._setValue(defaults, 'isIntelligentTestRunnerEnabled', false) this._setValue(defaults, 'isManualApiEnabled', false) + this._setValue(defaults, 'llmobs.agentlessEnabled', false) + this._setValue(defaults, 'llmobs.enabled', false) + this._setValue(defaults, 'llmobs.mlApp', undefined) this._setValue(defaults, 'ciVisibilityTestSessionName', '') this._setValue(defaults, 'ciVisAgentlessLogSubmissionEnabled', false) + this._setValue(defaults, 'legacyBaggageEnabled', true) + this._setValue(defaults, 'isTestDynamicInstrumentationEnabled', false) this._setValue(defaults, 'logInjection', false) this._setValue(defaults, 'lookup', undefined) + this._setValue(defaults, 'inferredProxyServicesEnabled', false) this._setValue(defaults, 'memcachedCommandEnabled', false) this._setValue(defaults, 'openAiLogsEnabled', false) this._setValue(defaults, 'openaiSpanCharLimit', 128) @@ -547,8 +556,8 @@ class Config { this._setValue(defaults, 'traceId128BitGenerationEnabled', true) this._setValue(defaults, 'traceId128BitLoggingEnabled', false) this._setValue(defaults, 'tracePropagationExtractFirst', false) - this._setValue(defaults, 'tracePropagationStyle.inject', ['datadog', 'tracecontext']) - this._setValue(defaults, 'tracePropagationStyle.extract', ['datadog', 'tracecontext']) + this._setValue(defaults, 'tracePropagationStyle.inject', ['datadog', 'tracecontext', 'baggage']) + this._setValue(defaults, 'tracePropagationStyle.extract', ['datadog', 'tracecontext', 'baggage']) this._setValue(defaults, 'tracePropagationStyle.otelPropagators', false) this._setValue(defaults, 'tracing', true) this._setValue(defaults, 'url', undefined) @@ -604,6 +613,9 @@ class Config { DD_INSTRUMENTATION_TELEMETRY_ENABLED, DD_INSTRUMENTATION_CONFIG_ID, DD_LOGS_INJECTION, + DD_LLMOBS_AGENTLESS_ENABLED, + DD_LLMOBS_ENABLED, + DD_LLMOBS_ML_APP, DD_OPENAI_LOGS_ENABLED, DD_OPENAI_SPAN_CHAR_LIMIT, DD_PROFILING_ENABLED, @@ -630,6 +642,8 @@ class Config { DD_TRACE_AGENT_HOSTNAME, DD_TRACE_AGENT_PORT, DD_TRACE_AGENT_PROTOCOL_VERSION, + DD_TRACE_BAGGAGE_MAX_BYTES, + DD_TRACE_BAGGAGE_MAX_ITEMS, DD_TRACE_CLIENT_IP_ENABLED, DD_TRACE_CLIENT_IP_HEADER, DD_TRACE_ENABLED, @@ -639,6 +653,7 @@ class Config { DD_TRACE_GIT_METADATA_ENABLED, DD_TRACE_GLOBAL_TAGS, DD_TRACE_HEADER_TAGS, + DD_TRACE_LEGACY_BAGGAGE_ENABLED, DD_TRACE_MEMCACHED_COMMAND_ENABLED, DD_TRACE_OBFUSCATION_QUERY_STRING_REGEXP, DD_TRACE_PARTIAL_FLUSH_MIN_SPANS, @@ -661,6 +676,7 @@ class Config { DD_TRACE_X_DATADOG_TAGS_MAX_LENGTH, DD_TRACING_ENABLED, DD_VERSION, + DD_TRACE_INFERRED_PROXY_SERVICES_ENABLED, OTEL_METRICS_EXPORTER, OTEL_PROPAGATORS, OTEL_RESOURCE_ATTRIBUTES, @@ -710,6 +726,8 @@ class Config { this._envUnprocessed['appsec.stackTrace.maxStackTraces'] = DD_APPSEC_MAX_STACK_TRACES this._setValue(env, 'appsec.wafTimeout', maybeInt(DD_APPSEC_WAF_TIMEOUT)) this._envUnprocessed['appsec.wafTimeout'] = DD_APPSEC_WAF_TIMEOUT + this._setValue(env, 'baggageMaxBytes', DD_TRACE_BAGGAGE_MAX_BYTES) + this._setValue(env, 'baggageMaxItems', DD_TRACE_BAGGAGE_MAX_ITEMS) this._setBoolean(env, 'clientIpEnabled', DD_TRACE_CLIENT_IP_ENABLED) this._setString(env, 'clientIpHeader', DD_TRACE_CLIENT_IP_HEADER) this._setBoolean(env, 'codeOriginForSpans.enabled', DD_CODE_ORIGIN_FOR_SPANS_ENABLED) @@ -750,6 +768,10 @@ class Config { this._setArray(env, 'injectionEnabled', DD_INJECTION_ENABLED) this._setBoolean(env, 'isAzureFunction', getIsAzureFunction()) this._setBoolean(env, 'isGCPFunction', getIsGCPFunction()) + this._setBoolean(env, 'legacyBaggageEnabled', DD_TRACE_LEGACY_BAGGAGE_ENABLED) + this._setBoolean(env, 'llmobs.agentlessEnabled', DD_LLMOBS_AGENTLESS_ENABLED) + this._setBoolean(env, 'llmobs.enabled', DD_LLMOBS_ENABLED) + this._setString(env, 'llmobs.mlApp', DD_LLMOBS_ML_APP) this._setBoolean(env, 'logInjection', DD_LOGS_INJECTION) // Requires an accompanying DD_APM_OBFUSCATION_MEMCACHED_KEEP_COMMAND=true in the agent this._setBoolean(env, 'memcachedCommandEnabled', DD_TRACE_MEMCACHED_COMMAND_ENABLED) @@ -842,6 +864,7 @@ class Config { : !!OTEL_PROPAGATORS) this._setBoolean(env, 'tracing', DD_TRACING_ENABLED) this._setString(env, 'version', DD_VERSION || tags.version) + this._setBoolean(env, 'inferredProxyServicesEnabled', DD_TRACE_INFERRED_PROXY_SERVICES_ENABLED) } _applyOptions (options) { @@ -883,6 +906,8 @@ class Config { this._optsUnprocessed['appsec.wafTimeout'] = options.appsec.wafTimeout this._setBoolean(opts, 'clientIpEnabled', options.clientIpEnabled) this._setString(opts, 'clientIpHeader', options.clientIpHeader) + this._setValue(opts, 'baggageMaxBytes', options.baggageMaxBytes) + this._setValue(opts, 'baggageMaxItems', options.baggageMaxItems) this._setBoolean(opts, 'codeOriginForSpans.enabled', options.codeOriginForSpans?.enabled) this._setString(opts, 'dbmPropagationMode', options.dbmPropagationMode) if (options.dogstatsd) { @@ -920,6 +945,9 @@ class Config { } this._setString(opts, 'iast.telemetryVerbosity', options.iast && options.iast.telemetryVerbosity) this._setBoolean(opts, 'isCiVisibility', options.isCiVisibility) + this._setBoolean(opts, 'legacyBaggageEnabled', options.legacyBaggageEnabled) + this._setBoolean(opts, 'llmobs.agentlessEnabled', options.llmobs?.agentlessEnabled) + this._setString(opts, 'llmobs.mlApp', options.llmobs?.mlApp) this._setBoolean(opts, 'logInjection', options.logInjection) this._setString(opts, 'lookup', options.lookup) this._setBoolean(opts, 'openAiLogsEnabled', options.openAiLogsEnabled) @@ -955,6 +983,16 @@ class Config { this._setBoolean(opts, 'traceId128BitGenerationEnabled', options.traceId128BitGenerationEnabled) this._setBoolean(opts, 'traceId128BitLoggingEnabled', options.traceId128BitLoggingEnabled) this._setString(opts, 'version', options.version || tags.version) + this._setBoolean(opts, 'inferredProxyServicesEnabled', options.inferredProxyServicesEnabled) + + // For LLMObs, we want the environment variable to take precedence over the options. + // This is reliant on environment config being set before options. + // This is to make sure the origins of each value are tracked appropriately for telemetry. + // We'll only set `llmobs.enabled` on the opts when it's not set on the environment, and options.llmobs is provided. + const llmobsEnabledEnv = this._env['llmobs.enabled'] + if (llmobsEnabledEnv == null && options.llmobs) { + this._setBoolean(opts, 'llmobs.enabled', !!options.llmobs) + } } _isCiVisibility () { @@ -1054,7 +1092,8 @@ class Config { DD_CIVISIBILITY_FLAKY_RETRY_ENABLED, DD_CIVISIBILITY_FLAKY_RETRY_COUNT, DD_TEST_SESSION_NAME, - DD_AGENTLESS_LOG_SUBMISSION_ENABLED + DD_AGENTLESS_LOG_SUBMISSION_ENABLED, + DD_TEST_DYNAMIC_INSTRUMENTATION_ENABLED } = process.env if (DD_CIVISIBILITY_AGENTLESS_URL) { @@ -1072,6 +1111,7 @@ class Config { this._setBoolean(calc, 'isManualApiEnabled', !isFalse(this._isCiVisibilityManualApiEnabled())) this._setString(calc, 'ciVisibilityTestSessionName', DD_TEST_SESSION_NAME) this._setBoolean(calc, 'ciVisAgentlessLogSubmissionEnabled', isTrue(DD_AGENTLESS_LOG_SUBMISSION_ENABLED)) + this._setBoolean(calc, 'isTestDynamicInstrumentationEnabled', isTrue(DD_TEST_DYNAMIC_INSTRUMENTATION_ENABLED)) } this._setString(calc, 'dogstatsd.hostname', this._getHostname()) this._setBoolean(calc, 'isGitUploadEnabled', diff --git a/packages/dd-trace/src/datastreams/pathway.js b/packages/dd-trace/src/datastreams/pathway.js index 066af789e64..ed2f6cc85f8 100644 --- a/packages/dd-trace/src/datastreams/pathway.js +++ b/packages/dd-trace/src/datastreams/pathway.js @@ -21,6 +21,7 @@ function shaHash (checkpointString) { } function computeHash (service, env, edgeTags, parentHash) { + edgeTags.sort() const hashableEdgeTags = edgeTags.filter(item => item !== 'manual_checkpoint:true') const key = `${service}${env}` + hashableEdgeTags.join('') + parentHash.toString() diff --git a/packages/dd-trace/src/debugger/devtools_client/index.js b/packages/dd-trace/src/debugger/devtools_client/index.js index 4675b61d725..db71e7028e7 100644 --- a/packages/dd-trace/src/debugger/devtools_client/index.js +++ b/packages/dd-trace/src/debugger/devtools_client/index.js @@ -5,7 +5,7 @@ const { breakpoints } = require('./state') const session = require('./session') const { getLocalStateForCallFrame } = require('./snapshot') const send = require('./send') -const { getScriptUrlFromId } = require('./state') +const { getStackFromCallFrames } = require('./state') const { ackEmitting, ackError } = require('./status') const { parentThreadId } = require('./config') const log = require('../../log') @@ -23,13 +23,14 @@ session.on('Debugger.paused', async ({ params }) => { const timestamp = Date.now() let captureSnapshotForProbe = null - let maxReferenceDepth, maxCollectionSize, maxLength + let maxReferenceDepth, maxCollectionSize, maxFieldCount, maxLength const probes = params.hitBreakpoints.map((id) => { const probe = breakpoints.get(id) if (probe.captureSnapshot) { captureSnapshotForProbe = probe maxReferenceDepth = highestOrUndefined(probe.capture.maxReferenceDepth, maxReferenceDepth) maxCollectionSize = highestOrUndefined(probe.capture.maxCollectionSize, maxCollectionSize) + maxFieldCount = highestOrUndefined(probe.capture.maxFieldCount, maxFieldCount) maxLength = highestOrUndefined(probe.capture.maxLength, maxLength) } return probe @@ -41,7 +42,7 @@ session.on('Debugger.paused', async ({ params }) => { // TODO: Create unique states for each affected probe based on that probes unique `capture` settings (DEBUG-2863) processLocalState = await getLocalStateForCallFrame( params.callFrames[0], - { maxReferenceDepth, maxCollectionSize, maxLength } + { maxReferenceDepth, maxCollectionSize, maxFieldCount, maxLength } ) } catch (err) { // TODO: This error is not tied to a specific probe, but to all probes with `captureSnapshot: true`. @@ -65,16 +66,7 @@ session.on('Debugger.paused', async ({ params }) => { thread_name: threadName } - const stack = params.callFrames.map((frame) => { - let fileName = getScriptUrlFromId(frame.location.scriptId) - if (fileName.startsWith('file://')) fileName = fileName.substr(7) // TODO: This might not be required - return { - fileName, - function: frame.functionName, - lineNumber: frame.location.lineNumber + 1, // Beware! lineNumber is zero-indexed - columnNumber: frame.location.columnNumber + 1 // Beware! columnNumber is zero-indexed - } - }) + const stack = getStackFromCallFrames(params.callFrames) // TODO: Send multiple probes in one HTTP request as an array (DEBUG-2848) for (const probe of probes) { diff --git a/packages/dd-trace/src/debugger/devtools_client/send.js b/packages/dd-trace/src/debugger/devtools_client/send.js index 593c3ea235d..f2ba5befd46 100644 --- a/packages/dd-trace/src/debugger/devtools_client/send.js +++ b/packages/dd-trace/src/debugger/devtools_client/send.js @@ -9,6 +9,8 @@ const { GIT_COMMIT_SHA, GIT_REPOSITORY_URL } = require('../../plugins/util/tags' module.exports = send +const MAX_PAYLOAD_SIZE = 1024 * 1024 // 1MB + const ddsource = 'dd_debugger' const hostname = getHostname() const service = config.service @@ -37,5 +39,17 @@ function send (message, logger, snapshot, cb) { 'debugger.snapshot': snapshot } - request(JSON.stringify(payload), opts, cb) + let json = JSON.stringify(payload) + + if (Buffer.byteLength(json) > MAX_PAYLOAD_SIZE) { + // TODO: This is a very crude way to handle large payloads. Proper pruning will be implemented later (DEBUG-2624) + const line = Object.values(payload['debugger.snapshot'].captures.lines)[0] + line.locals = { + notCapturedReason: 'Snapshot was too large', + size: Object.keys(line.locals).length + } + json = JSON.stringify(payload) + } + + request(json, opts, cb) } diff --git a/packages/dd-trace/src/debugger/devtools_client/snapshot/collector.js b/packages/dd-trace/src/debugger/devtools_client/snapshot/collector.js index 14f6db9727f..77f59173743 100644 --- a/packages/dd-trace/src/debugger/devtools_client/snapshot/collector.js +++ b/packages/dd-trace/src/debugger/devtools_client/snapshot/collector.js @@ -1,6 +1,6 @@ 'use strict' -const { collectionSizeSym } = require('./symbols') +const { collectionSizeSym, fieldCountSym } = require('./symbols') const session = require('../session') const LEAF_SUBTYPES = new Set(['date', 'regexp']) @@ -30,6 +30,11 @@ async function getObject (objectId, opts, depth = 0, collection = false) { result.splice(opts.maxCollectionSize) result[collectionSizeSym] = size } + } else if (result.length > opts.maxFieldCount) { + // Trim the number of properties on the object if there's too many. + const size = result.length + result.splice(opts.maxFieldCount) + result[fieldCountSym] = size } else if (privateProperties) { result.push(...privateProperties) } diff --git a/packages/dd-trace/src/debugger/devtools_client/snapshot/index.js b/packages/dd-trace/src/debugger/devtools_client/snapshot/index.js index cca7aa43bae..6b66ec76766 100644 --- a/packages/dd-trace/src/debugger/devtools_client/snapshot/index.js +++ b/packages/dd-trace/src/debugger/devtools_client/snapshot/index.js @@ -5,6 +5,7 @@ const { processRawState } = require('./processor') const DEFAULT_MAX_REFERENCE_DEPTH = 3 const DEFAULT_MAX_COLLECTION_SIZE = 100 +const DEFAULT_MAX_FIELD_COUNT = 20 const DEFAULT_MAX_LENGTH = 255 module.exports = { @@ -16,6 +17,7 @@ async function getLocalStateForCallFrame ( { maxReferenceDepth = DEFAULT_MAX_REFERENCE_DEPTH, maxCollectionSize = DEFAULT_MAX_COLLECTION_SIZE, + maxFieldCount = DEFAULT_MAX_FIELD_COUNT, maxLength = DEFAULT_MAX_LENGTH } = {} ) { @@ -24,7 +26,10 @@ async function getLocalStateForCallFrame ( for (const scope of callFrame.scopeChain) { if (scope.type === 'global') continue // The global scope is too noisy - rawState.push(...await getRuntimeObject(scope.object.objectId, { maxReferenceDepth, maxCollectionSize })) + rawState.push(...await getRuntimeObject( + scope.object.objectId, + { maxReferenceDepth, maxCollectionSize, maxFieldCount } + )) } // Deplay calling `processRawState` so the caller gets a chance to resume the main thread before processing `rawState` diff --git a/packages/dd-trace/src/debugger/devtools_client/snapshot/processor.js b/packages/dd-trace/src/debugger/devtools_client/snapshot/processor.js index 9ded1477441..ea52939ab0e 100644 --- a/packages/dd-trace/src/debugger/devtools_client/snapshot/processor.js +++ b/packages/dd-trace/src/debugger/devtools_client/snapshot/processor.js @@ -1,6 +1,6 @@ 'use strict' -const { collectionSizeSym } = require('./symbols') +const { collectionSizeSym, fieldCountSym } = require('./symbols') module.exports = { processRawState: processProperties @@ -139,7 +139,18 @@ function toString (str, maxLength) { function toObject (type, props, maxLength) { if (props === undefined) return notCapturedDepth(type) - return { type, fields: processProperties(props, maxLength) } + + const result = { + type, + fields: processProperties(props, maxLength) + } + + if (fieldCountSym in props) { + result.notCapturedReason = 'fieldCount' + result.size = props[fieldCountSym] + } + + return result } function toArray (type, elements, maxLength) { diff --git a/packages/dd-trace/src/debugger/devtools_client/snapshot/symbols.js b/packages/dd-trace/src/debugger/devtools_client/snapshot/symbols.js index 99efc36e5f6..66a82d0a160 100644 --- a/packages/dd-trace/src/debugger/devtools_client/snapshot/symbols.js +++ b/packages/dd-trace/src/debugger/devtools_client/snapshot/symbols.js @@ -1,5 +1,6 @@ 'use stict' module.exports = { - collectionSizeSym: Symbol('datadog.collectionSize') + collectionSizeSym: Symbol('datadog.collectionSize'), + fieldCountSym: Symbol('datadog.fieldCount') } diff --git a/packages/dd-trace/src/debugger/devtools_client/state.js b/packages/dd-trace/src/debugger/devtools_client/state.js index 8be9c808369..c409a69f6b7 100644 --- a/packages/dd-trace/src/debugger/devtools_client/state.js +++ b/packages/dd-trace/src/debugger/devtools_client/state.js @@ -32,8 +32,17 @@ module.exports = { .sort(([a], [b]) => a.length - b.length)[0] }, - getScriptUrlFromId (id) { - return scriptUrls.get(id) + getStackFromCallFrames (callFrames) { + return callFrames.map((frame) => { + let fileName = scriptUrls.get(frame.location.scriptId) + if (fileName.startsWith('file://')) fileName = fileName.substr(7) // TODO: This might not be required + return { + fileName, + function: frame.functionName, + lineNumber: frame.location.lineNumber + 1, // Beware! lineNumber is zero-indexed + columnNumber: frame.location.columnNumber + 1 // Beware! columnNumber is zero-indexed + } + }) } } diff --git a/packages/dd-trace/src/debugger/index.js b/packages/dd-trace/src/debugger/index.js index 5db1a440cf2..3638119c6f1 100644 --- a/packages/dd-trace/src/debugger/index.js +++ b/packages/dd-trace/src/debugger/index.js @@ -6,6 +6,7 @@ const log = require('../log') let worker = null let configChannel = null +let ackId = 0 const { NODE_OPTIONS, ...env } = process.env @@ -24,13 +25,19 @@ function start (config, rc) { configChannel = new MessageChannel() rc.setProductHandler('LIVE_DEBUGGING', (action, conf, id, ack) => { - const ackId = `${id}-${conf.version}` - rcAckCallbacks.set(ackId, ack) + rcAckCallbacks.set(++ackId, ack) rcChannel.port2.postMessage({ action, conf, ackId }) }) rcChannel.port2.on('message', ({ ackId, error }) => { - rcAckCallbacks.get(ackId)(error) + const ack = rcAckCallbacks.get(ackId) + if (ack === undefined) { + // This should never happen, but just in case something changes in the future, we should guard against it + log.error(`Received an unknown ackId: ${ackId}`) + if (error) log.error(error) + return + } + ack(error) rcAckCallbacks.delete(ackId) }) rcChannel.port2.on('messageerror', (err) => log.error(err)) diff --git a/packages/dd-trace/src/llmobs/constants/tags.js b/packages/dd-trace/src/llmobs/constants/tags.js new file mode 100644 index 00000000000..eee9a6b9890 --- /dev/null +++ b/packages/dd-trace/src/llmobs/constants/tags.js @@ -0,0 +1,34 @@ +'use strict' + +module.exports = { + SPAN_KINDS: ['llm', 'agent', 'workflow', 'task', 'tool', 'embedding', 'retrieval'], + SPAN_KIND: '_ml_obs.meta.span.kind', + SESSION_ID: '_ml_obs.session_id', + METADATA: '_ml_obs.meta.metadata', + METRICS: '_ml_obs.metrics', + ML_APP: '_ml_obs.meta.ml_app', + PROPAGATED_PARENT_ID_KEY: '_dd.p.llmobs_parent_id', + PARENT_ID_KEY: '_ml_obs.llmobs_parent_id', + TAGS: '_ml_obs.tags', + NAME: '_ml_obs.name', + TRACE_ID: '_ml_obs.trace_id', + PROPAGATED_TRACE_ID_KEY: '_dd.p.llmobs_trace_id', + ROOT_PARENT_ID: 'undefined', + + MODEL_NAME: '_ml_obs.meta.model_name', + MODEL_PROVIDER: '_ml_obs.meta.model_provider', + + INPUT_DOCUMENTS: '_ml_obs.meta.input.documents', + INPUT_MESSAGES: '_ml_obs.meta.input.messages', + INPUT_VALUE: '_ml_obs.meta.input.value', + + OUTPUT_DOCUMENTS: '_ml_obs.meta.output.documents', + OUTPUT_MESSAGES: '_ml_obs.meta.output.messages', + OUTPUT_VALUE: '_ml_obs.meta.output.value', + + INPUT_TOKENS_METRIC_KEY: 'input_tokens', + OUTPUT_TOKENS_METRIC_KEY: 'output_tokens', + TOTAL_TOKENS_METRIC_KEY: 'total_tokens', + + DROPPED_IO_COLLECTION_ERROR: 'dropped_io' +} diff --git a/packages/dd-trace/src/llmobs/constants/text.js b/packages/dd-trace/src/llmobs/constants/text.js new file mode 100644 index 00000000000..3c19b9febb6 --- /dev/null +++ b/packages/dd-trace/src/llmobs/constants/text.js @@ -0,0 +1,6 @@ +'use strict' + +module.exports = { + DROPPED_VALUE_TEXT: "[This value has been dropped because this span's size exceeds the 1MB size limit.]", + UNSERIALIZABLE_VALUE_TEXT: 'Unserializable value' +} diff --git a/packages/dd-trace/src/llmobs/constants/writers.js b/packages/dd-trace/src/llmobs/constants/writers.js new file mode 100644 index 00000000000..3726c33c7c0 --- /dev/null +++ b/packages/dd-trace/src/llmobs/constants/writers.js @@ -0,0 +1,13 @@ +'use strict' + +module.exports = { + EVP_PROXY_AGENT_BASE_PATH: 'evp_proxy/v2', + EVP_PROXY_AGENT_ENDPOINT: 'evp_proxy/v2/api/v2/llmobs', + EVP_SUBDOMAIN_HEADER_NAME: 'X-Datadog-EVP-Subdomain', + EVP_SUBDOMAIN_HEADER_VALUE: 'llmobs-intake', + AGENTLESS_SPANS_ENDPOINT: '/api/v2/llmobs', + AGENTLESS_EVALULATIONS_ENDPOINT: '/api/intake/llm-obs/v1/eval-metric', + + EVP_PAYLOAD_SIZE_LIMIT: 5 << 20, // 5MB (actual limit is 5.1MB) + EVP_EVENT_SIZE_LIMIT: (1 << 20) - 1024 // 999KB (actual limit is 1MB) +} diff --git a/packages/dd-trace/src/llmobs/index.js b/packages/dd-trace/src/llmobs/index.js new file mode 100644 index 00000000000..5d33ecb4c5d --- /dev/null +++ b/packages/dd-trace/src/llmobs/index.js @@ -0,0 +1,103 @@ +'use strict' + +const log = require('../log') +const { PROPAGATED_PARENT_ID_KEY } = require('./constants/tags') +const { storage } = require('./storage') + +const LLMObsSpanProcessor = require('./span_processor') + +const { channel } = require('dc-polyfill') +const spanProcessCh = channel('dd-trace:span:process') +const evalMetricAppendCh = channel('llmobs:eval-metric:append') +const flushCh = channel('llmobs:writers:flush') +const injectCh = channel('dd-trace:span:inject') + +const LLMObsAgentlessSpanWriter = require('./writers/spans/agentless') +const LLMObsAgentProxySpanWriter = require('./writers/spans/agentProxy') +const LLMObsEvalMetricsWriter = require('./writers/evaluations') + +/** + * Setting writers and processor globally when LLMObs is enabled + * We're setting these in this module instead of on the SDK. + * This is to isolate any subscribers and periodic tasks to this module, + * and not conditionally instantiate in the SDK, since the SDK is always instantiated + * if the tracer is `init`ed. But, in those cases, we don't want to start writers or subscribe + * to channels. + */ +let spanProcessor +let spanWriter +let evalWriter + +function enable (config) { + // create writers and eval writer append and flush channels + // span writer append is handled by the span processor + evalWriter = new LLMObsEvalMetricsWriter(config) + spanWriter = createSpanWriter(config) + + evalMetricAppendCh.subscribe(handleEvalMetricAppend) + flushCh.subscribe(handleFlush) + + // span processing + spanProcessor = new LLMObsSpanProcessor(config) + spanProcessor.setWriter(spanWriter) + spanProcessCh.subscribe(handleSpanProcess) + + // distributed tracing for llmobs + injectCh.subscribe(handleLLMObsParentIdInjection) +} + +function disable () { + if (evalMetricAppendCh.hasSubscribers) evalMetricAppendCh.unsubscribe(handleEvalMetricAppend) + if (flushCh.hasSubscribers) flushCh.unsubscribe(handleFlush) + if (spanProcessCh.hasSubscribers) spanProcessCh.unsubscribe(handleSpanProcess) + if (injectCh.hasSubscribers) injectCh.unsubscribe(handleLLMObsParentIdInjection) + + spanWriter?.destroy() + evalWriter?.destroy() + spanProcessor?.setWriter(null) + + spanWriter = null + evalWriter = null +} + +// since LLMObs traces can extend between services and be the same trace, +// we need to propogate the parent id. +function handleLLMObsParentIdInjection ({ carrier }) { + const parent = storage.getStore()?.span + if (!parent) return + + const parentId = parent?.context().toSpanId() + + carrier['x-datadog-tags'] += `,${PROPAGATED_PARENT_ID_KEY}=${parentId}` +} + +function createSpanWriter (config) { + const SpanWriter = config.llmobs.agentlessEnabled ? LLMObsAgentlessSpanWriter : LLMObsAgentProxySpanWriter + return new SpanWriter(config) +} + +function handleFlush () { + try { + spanWriter.flush() + evalWriter.flush() + } catch (e) { + log.warn(`Failed to flush LLMObs spans and evaluation metrics: ${e.message}`) + } +} + +function handleSpanProcess (data) { + spanProcessor.process(data) +} + +function handleEvalMetricAppend (payload) { + try { + evalWriter.append(payload) + } catch (e) { + log.warn(` + Failed to append evaluation metric to LLM Observability writer, likely due to an unserializable property. + Evaluation metrics won't be sent to LLM Observability: ${e.message} + `) + } +} + +module.exports = { enable, disable } diff --git a/packages/dd-trace/src/llmobs/noop.js b/packages/dd-trace/src/llmobs/noop.js new file mode 100644 index 00000000000..4eba48cd51c --- /dev/null +++ b/packages/dd-trace/src/llmobs/noop.js @@ -0,0 +1,82 @@ +'use strict' + +class NoopLLMObs { + constructor (noopTracer) { + this._tracer = noopTracer + } + + get enabled () { + return false + } + + enable (options) {} + + disable () {} + + trace (options = {}, fn) { + if (typeof options === 'function') { + fn = options + options = {} + } + + const name = options.name || options.kind || fn.name + + return this._tracer.trace(name, options, fn) + } + + wrap (options = {}, fn) { + if (typeof options === 'function') { + fn = options + options = {} + } + + const name = options.name || options.kind || fn.name + + return this._tracer.wrap(name, options, fn) + } + + decorate (options = {}) { + const llmobs = this + return function (target, ctxOrPropertyKey, descriptor) { + if (!ctxOrPropertyKey) return target + if (typeof ctxOrPropertyKey === 'object') { + const ctx = ctxOrPropertyKey + if (ctx.kind !== 'method') return target + + return llmobs.wrap({ name: ctx.name, ...options }, target) + } else { + const propertyKey = ctxOrPropertyKey + if (descriptor) { + if (typeof descriptor.value !== 'function') return descriptor + + const original = descriptor.value + descriptor.value = llmobs.wrap({ name: propertyKey, ...options }, original) + + return descriptor + } else { + if (typeof target[propertyKey] !== 'function') return target[propertyKey] + + const original = target[propertyKey] + Object.defineProperty(target, propertyKey, { + ...Object.getOwnPropertyDescriptor(target, propertyKey), + value: llmobs.wrap({ name: propertyKey, ...options }, original) + }) + + return target + } + } + } + } + + annotate (span, options) {} + + exportSpan (span) { + return {} + } + + submitEvaluation (llmobsSpanContext, options) {} + + flush () {} +} + +module.exports = NoopLLMObs diff --git a/packages/dd-trace/src/llmobs/plugins/base.js b/packages/dd-trace/src/llmobs/plugins/base.js new file mode 100644 index 00000000000..f7f4d2b5e94 --- /dev/null +++ b/packages/dd-trace/src/llmobs/plugins/base.js @@ -0,0 +1,65 @@ +'use strict' + +const log = require('../../log') +const { storage } = require('../storage') + +const TracingPlugin = require('../../plugins/tracing') +const LLMObsTagger = require('../tagger') + +// we make this a `Plugin` so we don't have to worry about `finish` being called +class LLMObsPlugin extends TracingPlugin { + constructor (...args) { + super(...args) + + this._tagger = new LLMObsTagger(this._tracerConfig, true) + } + + getName () {} + + setLLMObsTags (ctx) { + throw new Error('setLLMObsTags must be implemented by the subclass') + } + + getLLMObsSPanRegisterOptions (ctx) { + throw new Error('getLLMObsSPanRegisterOptions must be implemented by the subclass') + } + + start (ctx) { + const oldStore = storage.getStore() + const parent = oldStore?.span + const span = ctx.currentStore?.span + + const registerOptions = this.getLLMObsSPanRegisterOptions(ctx) + + this._tagger.registerLLMObsSpan(span, { parent, ...registerOptions }) + } + + asyncEnd (ctx) { + // even though llmobs span events won't be enqueued if llmobs is disabled + // we should avoid doing any computations here (these listeners aren't disabled) + const enabled = this._tracerConfig.llmobs.enabled + if (!enabled) return + + const span = ctx.currentStore?.span + if (!span) { + log.debug( + `Tried to start an LLMObs span for ${this.constructor.name} without an active APM span. + Not starting LLMObs span.` + ) + return + } + + this.setLLMObsTags(ctx) + } + + configure (config) { + // we do not want to enable any LLMObs plugins if it is disabled on the tracer + const llmobsEnabled = this._tracerConfig.llmobs.enabled + if (llmobsEnabled === false) { + config = typeof config === 'boolean' ? false : { ...config, enabled: false } // override to false + } + super.configure(config) + } +} + +module.exports = LLMObsPlugin diff --git a/packages/dd-trace/src/llmobs/plugins/openai.js b/packages/dd-trace/src/llmobs/plugins/openai.js new file mode 100644 index 00000000000..431760a04f8 --- /dev/null +++ b/packages/dd-trace/src/llmobs/plugins/openai.js @@ -0,0 +1,205 @@ +'use strict' + +const LLMObsPlugin = require('./base') + +class OpenAiLLMObsPlugin extends LLMObsPlugin { + static get prefix () { + return 'tracing:apm:openai:request' + } + + getLLMObsSPanRegisterOptions (ctx) { + const resource = ctx.methodName + const methodName = gateResource(normalizeOpenAIResourceName(resource)) + if (!methodName) return // we will not trace all openai methods for llmobs + + const inputs = ctx.args[0] // completion, chat completion, and embeddings take one argument + const operation = getOperation(methodName) + const kind = operation === 'embedding' ? 'embedding' : 'llm' + const name = `openai.${methodName}` + + return { + modelProvider: 'openai', + modelName: inputs.model, + kind, + name + } + } + + setLLMObsTags (ctx) { + const span = ctx.currentStore?.span + const resource = ctx.methodName + const methodName = gateResource(normalizeOpenAIResourceName(resource)) + if (!methodName) return // we will not trace all openai methods for llmobs + + const inputs = ctx.args[0] // completion, chat completion, and embeddings take one argument + const response = ctx.result?.data // no result if error + const error = !!span.context()._tags.error + + const operation = getOperation(methodName) + + if (operation === 'completion') { + this._tagCompletion(span, inputs, response, error) + } else if (operation === 'chat') { + this._tagChatCompletion(span, inputs, response, error) + } else if (operation === 'embedding') { + this._tagEmbedding(span, inputs, response, error) + } + + if (!error) { + const metrics = this._extractMetrics(response) + this._tagger.tagMetrics(span, metrics) + } + } + + _extractMetrics (response) { + const metrics = {} + const tokenUsage = response.usage + + if (tokenUsage) { + const inputTokens = tokenUsage.prompt_tokens + if (inputTokens) metrics.inputTokens = inputTokens + + const outputTokens = tokenUsage.completion_tokens + if (outputTokens) metrics.outputTokens = outputTokens + + const totalTokens = tokenUsage.total_toksn || (inputTokens + outputTokens) + if (totalTokens) metrics.totalTokens = totalTokens + } + + return metrics + } + + _tagEmbedding (span, inputs, response, error) { + const { model, ...parameters } = inputs + + const metadata = { + encoding_format: parameters.encoding_format || 'float' + } + if (inputs.dimensions) metadata.dimensions = inputs.dimensions + this._tagger.tagMetadata(span, metadata) + + let embeddingInputs = inputs.input + if (!Array.isArray(embeddingInputs)) embeddingInputs = [embeddingInputs] + const embeddingInput = embeddingInputs.map(input => ({ text: input })) + + if (error) { + this._tagger.tagEmbeddingIO(span, embeddingInput, undefined) + return + } + + const float = Array.isArray(response.data[0].embedding) + let embeddingOutput + if (float) { + const embeddingDim = response.data[0].embedding.length + embeddingOutput = `[${response.data.length} embedding(s) returned with size ${embeddingDim}]` + } else { + embeddingOutput = `[${response.data.length} embedding(s) returned]` + } + + this._tagger.tagEmbeddingIO(span, embeddingInput, embeddingOutput) + } + + _tagCompletion (span, inputs, response, error) { + let { prompt, model, ...parameters } = inputs + if (!Array.isArray(prompt)) prompt = [prompt] + + const completionInput = prompt.map(p => ({ content: p })) + + const completionOutput = error ? [{ content: '' }] : response.choices.map(choice => ({ content: choice.text })) + + this._tagger.tagLLMIO(span, completionInput, completionOutput) + this._tagger.tagMetadata(span, parameters) + } + + _tagChatCompletion (span, inputs, response, error) { + const { messages, model, ...parameters } = inputs + + if (error) { + this._tagger.tagLLMIO(span, messages, [{ content: '' }]) + return + } + + const outputMessages = [] + const { choices } = response + for (const choice of choices) { + const message = choice.message || choice.delta + const content = message.content || '' + const role = message.role + + if (message.function_call) { + const functionCallInfo = { + name: message.function_call.name, + arguments: JSON.parse(message.function_call.arguments) + } + outputMessages.push({ content, role, toolCalls: [functionCallInfo] }) + } else if (message.tool_calls) { + const toolCallsInfo = [] + for (const toolCall of message.tool_calls) { + const toolCallInfo = { + arguments: JSON.parse(toolCall.function.arguments), + name: toolCall.function.name, + toolId: toolCall.id, + type: toolCall.type + } + toolCallsInfo.push(toolCallInfo) + } + outputMessages.push({ content, role, toolCalls: toolCallsInfo }) + } else { + outputMessages.push({ content, role }) + } + } + + this._tagger.tagLLMIO(span, messages, outputMessages) + + const metadata = Object.entries(parameters).reduce((obj, [key, value]) => { + if (!['tools', 'functions'].includes(key)) { + obj[key] = value + } + + return obj + }, {}) + + this._tagger.tagMetadata(span, metadata) + } +} + +// TODO: this will be moved to the APM integration +function normalizeOpenAIResourceName (resource) { + switch (resource) { + // completions + case 'completions.create': + return 'createCompletion' + + // chat completions + case 'chat.completions.create': + return 'createChatCompletion' + + // embeddings + case 'embeddings.create': + return 'createEmbedding' + default: + return resource + } +} + +function gateResource (resource) { + return ['createCompletion', 'createChatCompletion', 'createEmbedding'].includes(resource) + ? resource + : undefined +} + +function getOperation (resource) { + switch (resource) { + case 'createCompletion': + return 'completion' + case 'createChatCompletion': + return 'chat' + case 'createEmbedding': + return 'embedding' + default: + // should never happen + return 'unknown' + } +} + +module.exports = OpenAiLLMObsPlugin diff --git a/packages/dd-trace/src/llmobs/sdk.js b/packages/dd-trace/src/llmobs/sdk.js new file mode 100644 index 00000000000..5717a8a0f19 --- /dev/null +++ b/packages/dd-trace/src/llmobs/sdk.js @@ -0,0 +1,377 @@ +'use strict' + +const { SPAN_KIND, OUTPUT_VALUE } = require('./constants/tags') + +const { + getFunctionArguments, + validateKind +} = require('./util') +const { isTrue } = require('../util') + +const { storage } = require('./storage') + +const Span = require('../opentracing/span') + +const tracerVersion = require('../../../../package.json').version +const logger = require('../log') + +const LLMObsTagger = require('./tagger') + +// communicating with writer +const { channel } = require('dc-polyfill') +const evalMetricAppendCh = channel('llmobs:eval-metric:append') +const flushCh = channel('llmobs:writers:flush') +const NoopLLMObs = require('./noop') + +class LLMObs extends NoopLLMObs { + constructor (tracer, llmobsModule, config) { + super(tracer) + + this._config = config + this._llmobsModule = llmobsModule + this._tagger = new LLMObsTagger(config) + } + + get enabled () { + return this._config.llmobs.enabled + } + + enable (options = {}) { + if (this.enabled) { + logger.debug('LLMObs is already enabled.') + return + } + + logger.debug('Enabling LLMObs') + + const { mlApp, agentlessEnabled } = options + + const { DD_LLMOBS_ENABLED } = process.env + + const llmobsConfig = { + mlApp, + agentlessEnabled + } + + const enabled = DD_LLMOBS_ENABLED == null || isTrue(DD_LLMOBS_ENABLED) + if (!enabled) { + logger.debug('LLMObs.enable() called when DD_LLMOBS_ENABLED is false. No action taken.') + return + } + + this._config.llmobs.enabled = true + this._config.configure({ ...this._config, llmobs: llmobsConfig }) + + // configure writers and channel subscribers + this._llmobsModule.enable(this._config) + } + + disable () { + if (!this.enabled) { + logger.debug('LLMObs is already disabled.') + return + } + + logger.debug('Disabling LLMObs') + + this._config.llmobs.enabled = false + + // disable writers and channel subscribers + this._llmobsModule.disable() + } + + trace (options = {}, fn) { + if (typeof options === 'function') { + fn = options + options = {} + } + + const kind = validateKind(options.kind) // will throw if kind is undefined or not an expected kind + + // name is required for spans generated with `trace` + // while `kind` is required, this should never throw (as otherwise it would have thrown above) + const name = options.name || kind + if (!name) { + throw new Error('No span name provided for `trace`.') + } + + const { + spanOptions, + ...llmobsOptions + } = this._extractOptions(options) + + if (fn.length > 1) { + return this._tracer.trace(name, spanOptions, (span, cb) => + this._activate(span, { kind, options: llmobsOptions }, () => fn(span, cb)) + ) + } + + return this._tracer.trace(name, spanOptions, span => + this._activate(span, { kind, options: llmobsOptions }, () => fn(span)) + ) + } + + wrap (options = {}, fn) { + if (typeof options === 'function') { + fn = options + options = {} + } + + const kind = validateKind(options.kind) // will throw if kind is undefined or not an expected kind + let name = options.name || (fn?.name ? fn.name : undefined) || kind + + if (!name) { + logger.warn('No span name provided for `wrap`. Defaulting to "unnamed-anonymous-function".') + name = 'unnamed-anonymous-function' + } + + const { + spanOptions, + ...llmobsOptions + } = this._extractOptions(options) + + const llmobs = this + + function wrapped () { + const span = llmobs._tracer.scope().active() + + const result = llmobs._activate(span, { kind, options: llmobsOptions }, () => { + if (!['llm', 'embedding'].includes(kind)) { + llmobs.annotate(span, { inputData: getFunctionArguments(fn, arguments) }) + } + + return fn.apply(this, arguments) + }) + + if (result && typeof result.then === 'function') { + return result.then(value => { + if (value && !['llm', 'retrieval'].includes(kind) && !LLMObsTagger.tagMap.get(span)?.[OUTPUT_VALUE]) { + llmobs.annotate(span, { outputData: value }) + } + return value + }) + } + + if (result && !['llm', 'retrieval'].includes(kind) && !LLMObsTagger.tagMap.get(span)?.[OUTPUT_VALUE]) { + llmobs.annotate(span, { outputData: result }) + } + + return result + } + + return this._tracer.wrap(name, spanOptions, wrapped) + } + + annotate (span, options) { + if (!this.enabled) return + + if (!span) { + span = this._active() + } + + if ((span && !options) && !(span instanceof Span)) { + options = span + span = this._active() + } + + if (!span) { + throw new Error('No span provided and no active LLMObs-generated span found') + } + if (!options) { + throw new Error('No options provided for annotation.') + } + + if (!LLMObsTagger.tagMap.has(span)) { + throw new Error('Span must be an LLMObs-generated span') + } + if (span._duration !== undefined) { + throw new Error('Cannot annotate a finished span') + } + + const spanKind = LLMObsTagger.tagMap.get(span)[SPAN_KIND] + if (!spanKind) { + throw new Error('LLMObs span must have a span kind specified') + } + + const { inputData, outputData, metadata, metrics, tags } = options + + if (inputData || outputData) { + if (spanKind === 'llm') { + this._tagger.tagLLMIO(span, inputData, outputData) + } else if (spanKind === 'embedding') { + this._tagger.tagEmbeddingIO(span, inputData, outputData) + } else if (spanKind === 'retrieval') { + this._tagger.tagRetrievalIO(span, inputData, outputData) + } else { + this._tagger.tagTextIO(span, inputData, outputData) + } + } + + if (metadata) { + this._tagger.tagMetadata(span, metadata) + } + + if (metrics) { + this._tagger.tagMetrics(span, metrics) + } + + if (tags) { + this._tagger.tagSpanTags(span, tags) + } + } + + exportSpan (span) { + span = span || this._active() + + if (!span) { + throw new Error('No span provided and no active LLMObs-generated span found') + } + + if (!(span instanceof Span)) { + throw new Error('Span must be a valid Span object.') + } + + if (!LLMObsTagger.tagMap.has(span)) { + throw new Error('Span must be an LLMObs-generated span') + } + + try { + return { + traceId: span.context().toTraceId(true), + spanId: span.context().toSpanId() + } + } catch { + logger.warn('Faild to export span. Span must be a valid Span object.') + } + } + + submitEvaluation (llmobsSpanContext, options = {}) { + if (!this.enabled) return + + if (!this._config.apiKey) { + throw new Error( + 'DD_API_KEY is required for sending evaluation metrics. Evaluation metric data will not be sent.\n' + + 'Ensure this configuration is set before running your application.' + ) + } + + const { traceId, spanId } = llmobsSpanContext + if (!traceId || !spanId) { + throw new Error( + 'spanId and traceId must both be specified for the given evaluation metric to be submitted.' + ) + } + + const mlApp = options.mlApp || this._config.llmobs.mlApp + if (!mlApp) { + throw new Error( + 'ML App name is required for sending evaluation metrics. Evaluation metric data will not be sent.' + ) + } + + const timestampMs = options.timestampMs || Date.now() + if (typeof timestampMs !== 'number' || timestampMs < 0) { + throw new Error('timestampMs must be a non-negative integer. Evaluation metric data will not be sent') + } + + const { label, value, tags } = options + const metricType = options.metricType?.toLowerCase() + if (!label) { + throw new Error('label must be the specified name of the evaluation metric') + } + if (!metricType || !['categorical', 'score'].includes(metricType)) { + throw new Error('metricType must be one of "categorical" or "score"') + } + + if (metricType === 'categorical' && typeof value !== 'string') { + throw new Error('value must be a string for a categorical metric.') + } + if (metricType === 'score' && typeof value !== 'number') { + throw new Error('value must be a number for a score metric.') + } + + const evaluationTags = { + 'dd-trace.version': tracerVersion, + ml_app: mlApp + } + + if (tags) { + for (const key in tags) { + const tag = tags[key] + if (typeof tag === 'string') { + evaluationTags[key] = tag + } else if (typeof tag.toString === 'function') { + evaluationTags[key] = tag.toString() + } else if (tag == null) { + evaluationTags[key] = Object.prototype.toString.call(tag) + } else { + // should be a rare case + // every object in JS has a toString, otherwise every primitive has its own toString + // null and undefined are handled above + throw new Error('Failed to parse tags. Tags for evaluation metrics must be strings') + } + } + } + + const payload = { + span_id: spanId, + trace_id: traceId, + label, + metric_type: metricType, + ml_app: mlApp, + [`${metricType}_value`]: value, + timestamp_ms: timestampMs, + tags: Object.entries(evaluationTags).map(([key, value]) => `${key}:${value}`) + } + + evalMetricAppendCh.publish(payload) + } + + flush () { + if (!this.enabled) return + + flushCh.publish() + } + + _active () { + const store = storage.getStore() + return store?.span + } + + _activate (span, { kind, options } = {}, fn) { + const parent = this._active() + if (this.enabled) storage.enterWith({ span }) + + this._tagger.registerLLMObsSpan(span, { + ...options, + parent, + kind + }) + + try { + return fn() + } finally { + if (this.enabled) storage.enterWith({ span: parent }) + } + } + + _extractOptions (options) { + const { + modelName, + modelProvider, + sessionId, + mlApp, + ...spanOptions + } = options + + return { + mlApp, + modelName, + modelProvider, + sessionId, + spanOptions + } + } +} + +module.exports = LLMObs diff --git a/packages/dd-trace/src/llmobs/span_processor.js b/packages/dd-trace/src/llmobs/span_processor.js new file mode 100644 index 00000000000..bc8eeda06b7 --- /dev/null +++ b/packages/dd-trace/src/llmobs/span_processor.js @@ -0,0 +1,195 @@ +'use strict' + +const { + SPAN_KIND, + MODEL_NAME, + MODEL_PROVIDER, + METADATA, + INPUT_MESSAGES, + INPUT_VALUE, + OUTPUT_MESSAGES, + INPUT_DOCUMENTS, + OUTPUT_DOCUMENTS, + OUTPUT_VALUE, + METRICS, + ML_APP, + TAGS, + PARENT_ID_KEY, + SESSION_ID, + NAME +} = require('./constants/tags') +const { UNSERIALIZABLE_VALUE_TEXT } = require('./constants/text') + +const { + ERROR_MESSAGE, + ERROR_TYPE, + ERROR_STACK +} = require('../constants') + +const LLMObsTagger = require('./tagger') + +const tracerVersion = require('../../../../package.json').version +const logger = require('../log') + +class LLMObsSpanProcessor { + constructor (config) { + this._config = config + } + + setWriter (writer) { + this._writer = writer + } + + // TODO: instead of relying on the tagger's weakmap registry, can we use some namespaced storage correlation? + process ({ span }) { + if (!this._config.llmobs.enabled) return + // if the span is not in our private tagger map, it is not an llmobs span + if (!LLMObsTagger.tagMap.has(span)) return + + try { + const formattedEvent = this.format(span) + this._writer.append(formattedEvent) + } catch (e) { + // this should be a rare case + // we protect against unserializable properties in the format function, and in + // safeguards in the tagger + logger.warn(` + Failed to append span to LLM Observability writer, likely due to an unserializable property. + Span won't be sent to LLM Observability: ${e.message} + `) + } + } + + format (span) { + const spanTags = span.context()._tags + const mlObsTags = LLMObsTagger.tagMap.get(span) + + const spanKind = mlObsTags[SPAN_KIND] + + const meta = { 'span.kind': spanKind, input: {}, output: {} } + const input = {} + const output = {} + + if (['llm', 'embedding'].includes(spanKind)) { + meta.model_name = mlObsTags[MODEL_NAME] || 'custom' + meta.model_provider = (mlObsTags[MODEL_PROVIDER] || 'custom').toLowerCase() + } + if (mlObsTags[METADATA]) { + this._addObject(mlObsTags[METADATA], meta.metadata = {}) + } + if (spanKind === 'llm' && mlObsTags[INPUT_MESSAGES]) { + input.messages = mlObsTags[INPUT_MESSAGES] + } + if (mlObsTags[INPUT_VALUE]) { + input.value = mlObsTags[INPUT_VALUE] + } + if (spanKind === 'llm' && mlObsTags[OUTPUT_MESSAGES]) { + output.messages = mlObsTags[OUTPUT_MESSAGES] + } + if (spanKind === 'embedding' && mlObsTags[INPUT_DOCUMENTS]) { + input.documents = mlObsTags[INPUT_DOCUMENTS] + } + if (mlObsTags[OUTPUT_VALUE]) { + output.value = mlObsTags[OUTPUT_VALUE] + } + if (spanKind === 'retrieval' && mlObsTags[OUTPUT_DOCUMENTS]) { + output.documents = mlObsTags[OUTPUT_DOCUMENTS] + } + + const error = spanTags.error || spanTags[ERROR_TYPE] + if (error) { + meta[ERROR_MESSAGE] = spanTags[ERROR_MESSAGE] || error.message || error.code + meta[ERROR_TYPE] = spanTags[ERROR_TYPE] || error.name + meta[ERROR_STACK] = spanTags[ERROR_STACK] || error.stack + } + + if (input) meta.input = input + if (output) meta.output = output + + const metrics = mlObsTags[METRICS] || {} + + const mlApp = mlObsTags[ML_APP] + const sessionId = mlObsTags[SESSION_ID] + const parentId = mlObsTags[PARENT_ID_KEY] + + const name = mlObsTags[NAME] || span._name + + const llmObsSpanEvent = { + trace_id: span.context().toTraceId(true), + span_id: span.context().toSpanId(), + parent_id: parentId, + name, + tags: this._processTags(span, mlApp, sessionId, error), + start_ns: Math.round(span._startTime * 1e6), + duration: Math.round(span._duration * 1e6), + status: error ? 'error' : 'ok', + meta, + metrics, + _dd: { + span_id: span.context().toSpanId(), + trace_id: span.context().toTraceId(true) + } + } + + if (sessionId) llmObsSpanEvent.session_id = sessionId + + return llmObsSpanEvent + } + + // For now, this only applies to metadata, as we let users annotate this field with any object + // However, we want to protect against circular references or BigInts (unserializable) + // This function can be reused for other fields if needed + // Messages, Documents, and Metrics are safeguarded in `llmobs/tagger.js` + _addObject (obj, carrier) { + const seenObjects = new WeakSet() + seenObjects.add(obj) // capture root object + + const isCircular = value => { + if (typeof value !== 'object') return false + if (seenObjects.has(value)) return true + seenObjects.add(value) + return false + } + + const add = (obj, carrier) => { + for (const key in obj) { + const value = obj[key] + if (!Object.prototype.hasOwnProperty.call(obj, key)) continue + if (typeof value === 'bigint' || isCircular(value)) { + // mark as unserializable instead of dropping + logger.warn(`Unserializable property found in metadata: ${key}`) + carrier[key] = UNSERIALIZABLE_VALUE_TEXT + continue + } + if (typeof value === 'object') { + add(value, carrier[key] = {}) + } else { + carrier[key] = value + } + } + } + + add(obj, carrier) + } + + _processTags (span, mlApp, sessionId, error) { + let tags = { + version: this._config.version, + env: this._config.env, + service: this._config.service, + source: 'integration', + ml_app: mlApp, + 'dd-trace.version': tracerVersion, + error: Number(!!error) || 0, + language: 'javascript' + } + const errType = span.context()._tags[ERROR_TYPE] || error?.name + if (errType) tags.error_type = errType + if (sessionId) tags.session_id = sessionId + const existingTags = LLMObsTagger.tagMap.get(span)?.[TAGS] || {} + if (existingTags) tags = { ...tags, ...existingTags } + return Object.entries(tags).map(([key, value]) => `${key}:${value ?? ''}`) + } +} + +module.exports = LLMObsSpanProcessor diff --git a/packages/dd-trace/src/llmobs/storage.js b/packages/dd-trace/src/llmobs/storage.js new file mode 100644 index 00000000000..1362aaf966e --- /dev/null +++ b/packages/dd-trace/src/llmobs/storage.js @@ -0,0 +1,7 @@ +'use strict' + +// TODO: remove this and use namespaced storage once available +const { AsyncLocalStorage } = require('async_hooks') +const storage = new AsyncLocalStorage() + +module.exports = { storage } diff --git a/packages/dd-trace/src/llmobs/tagger.js b/packages/dd-trace/src/llmobs/tagger.js new file mode 100644 index 00000000000..9f1728e5d7b --- /dev/null +++ b/packages/dd-trace/src/llmobs/tagger.js @@ -0,0 +1,322 @@ +'use strict' + +const log = require('../log') +const { + MODEL_NAME, + MODEL_PROVIDER, + SESSION_ID, + ML_APP, + SPAN_KIND, + INPUT_VALUE, + OUTPUT_DOCUMENTS, + INPUT_DOCUMENTS, + OUTPUT_VALUE, + METADATA, + METRICS, + PARENT_ID_KEY, + INPUT_MESSAGES, + OUTPUT_MESSAGES, + TAGS, + NAME, + PROPAGATED_PARENT_ID_KEY, + ROOT_PARENT_ID, + INPUT_TOKENS_METRIC_KEY, + OUTPUT_TOKENS_METRIC_KEY, + TOTAL_TOKENS_METRIC_KEY +} = require('./constants/tags') + +// global registry of LLMObs spans +// maps LLMObs spans to their annotations +const registry = new WeakMap() + +class LLMObsTagger { + constructor (config, softFail = false) { + this._config = config + + this.softFail = softFail + } + + static get tagMap () { + return registry + } + + registerLLMObsSpan (span, { + modelName, + modelProvider, + sessionId, + mlApp, + parent, + kind, + name + } = {}) { + if (!this._config.llmobs.enabled) return + if (!kind) return // do not register it in the map if it doesn't have an llmobs span kind + + this._register(span) + + if (name) this._setTag(span, NAME, name) + + this._setTag(span, SPAN_KIND, kind) + if (modelName) this._setTag(span, MODEL_NAME, modelName) + if (modelProvider) this._setTag(span, MODEL_PROVIDER, modelProvider) + + sessionId = sessionId || parent?.context()._tags[SESSION_ID] + if (sessionId) this._setTag(span, SESSION_ID, sessionId) + + if (!mlApp) mlApp = parent?.context()._tags[ML_APP] || this._config.llmobs.mlApp + this._setTag(span, ML_APP, mlApp) + + const parentId = + parent?.context().toSpanId() || + span.context()._trace.tags[PROPAGATED_PARENT_ID_KEY] || + ROOT_PARENT_ID + this._setTag(span, PARENT_ID_KEY, parentId) + } + + // TODO: similarly for the following `tag` methods, + // how can we transition from a span weakmap to core API functionality + tagLLMIO (span, inputData, outputData) { + this._tagMessages(span, inputData, INPUT_MESSAGES) + this._tagMessages(span, outputData, OUTPUT_MESSAGES) + } + + tagEmbeddingIO (span, inputData, outputData) { + this._tagDocuments(span, inputData, INPUT_DOCUMENTS) + this._tagText(span, outputData, OUTPUT_VALUE) + } + + tagRetrievalIO (span, inputData, outputData) { + this._tagText(span, inputData, INPUT_VALUE) + this._tagDocuments(span, outputData, OUTPUT_DOCUMENTS) + } + + tagTextIO (span, inputData, outputData) { + this._tagText(span, inputData, INPUT_VALUE) + this._tagText(span, outputData, OUTPUT_VALUE) + } + + tagMetadata (span, metadata) { + this._setTag(span, METADATA, metadata) + } + + tagMetrics (span, metrics) { + const filterdMetrics = {} + for (const [key, value] of Object.entries(metrics)) { + let processedKey = key + + // processing these specifically for our metrics ingestion + switch (key) { + case 'inputTokens': + processedKey = INPUT_TOKENS_METRIC_KEY + break + case 'outputTokens': + processedKey = OUTPUT_TOKENS_METRIC_KEY + break + case 'totalTokens': + processedKey = TOTAL_TOKENS_METRIC_KEY + break + } + + if (typeof value === 'number') { + filterdMetrics[processedKey] = value + } else { + this._handleFailure(`Value for metric '${key}' must be a number, instead got ${value}`) + } + } + + this._setTag(span, METRICS, filterdMetrics) + } + + tagSpanTags (span, tags) { + // new tags will be merged with existing tags + const currentTags = registry.get(span)?.[TAGS] + if (currentTags) { + Object.assign(tags, currentTags) + } + this._setTag(span, TAGS, tags) + } + + _tagText (span, data, key) { + if (data) { + if (typeof data === 'string') { + this._setTag(span, key, data) + } else { + try { + this._setTag(span, key, JSON.stringify(data)) + } catch { + const type = key === INPUT_VALUE ? 'input' : 'output' + this._handleFailure(`Failed to parse ${type} value, must be JSON serializable.`) + } + } + } + } + + _tagDocuments (span, data, key) { + if (data) { + if (!Array.isArray(data)) { + data = [data] + } + + const documents = data.map(document => { + if (typeof document === 'string') { + return { text: document } + } + + if (document == null || typeof document !== 'object') { + this._handleFailure('Documents must be a string, object, or list of objects.') + return undefined + } + + const { text, name, id, score } = document + let validDocument = true + + if (typeof text !== 'string') { + this._handleFailure('Document text must be a string.') + validDocument = false + } + + const documentObj = { text } + + validDocument = this._tagConditionalString(name, 'Document name', documentObj, 'name') && validDocument + validDocument = this._tagConditionalString(id, 'Document ID', documentObj, 'id') && validDocument + validDocument = this._tagConditionalNumber(score, 'Document score', documentObj, 'score') && validDocument + + return validDocument ? documentObj : undefined + }).filter(doc => !!doc) + + if (documents.length) { + this._setTag(span, key, documents) + } + } + } + + _tagMessages (span, data, key) { + if (data) { + if (!Array.isArray(data)) { + data = [data] + } + + const messages = data.map(message => { + if (typeof message === 'string') { + return { content: message } + } + + if (message == null || typeof message !== 'object') { + this._handleFailure('Messages must be a string, object, or list of objects') + return undefined + } + + let validMessage = true + + const { content = '', role } = message + let toolCalls = message.toolCalls + const messageObj = { content } + + if (typeof content !== 'string') { + this._handleFailure('Message content must be a string.') + validMessage = false + } + + validMessage = this._tagConditionalString(role, 'Message role', messageObj, 'role') && validMessage + + if (toolCalls) { + if (!Array.isArray(toolCalls)) { + toolCalls = [toolCalls] + } + + const filteredToolCalls = toolCalls.map(toolCall => { + if (typeof toolCall !== 'object') { + this._handleFailure('Tool call must be an object.') + return undefined + } + + let validTool = true + + const { name, arguments: args, toolId, type } = toolCall + const toolCallObj = {} + + validTool = this._tagConditionalString(name, 'Tool name', toolCallObj, 'name') && validTool + validTool = this._tagConditionalObject(args, 'Tool arguments', toolCallObj, 'arguments') && validTool + validTool = this._tagConditionalString(toolId, 'Tool ID', toolCallObj, 'tool_id') && validTool + validTool = this._tagConditionalString(type, 'Tool type', toolCallObj, 'type') && validTool + + return validTool ? toolCallObj : undefined + }).filter(toolCall => !!toolCall) + + if (filteredToolCalls.length) { + messageObj.tool_calls = filteredToolCalls + } + } + + return validMessage ? messageObj : undefined + }).filter(msg => !!msg) + + if (messages.length) { + this._setTag(span, key, messages) + } + } + } + + _tagConditionalString (data, type, carrier, key) { + if (!data) return true + if (typeof data !== 'string') { + this._handleFailure(`"${type}" must be a string.`) + return false + } + carrier[key] = data + return true + } + + _tagConditionalNumber (data, type, carrier, key) { + if (!data) return true + if (typeof data !== 'number') { + this._handleFailure(`"${type}" must be a number.`) + return false + } + carrier[key] = data + return true + } + + _tagConditionalObject (data, type, carrier, key) { + if (!data) return true + if (typeof data !== 'object') { + this._handleFailure(`"${type}" must be an object.`) + return false + } + carrier[key] = data + return true + } + + // any public-facing LLMObs APIs using this tagger should not soft fail + // auto-instrumentation should soft fail + _handleFailure (msg) { + if (this.softFail) { + log.warn(msg) + } else { + throw new Error(msg) + } + } + + _register (span) { + if (!this._config.llmobs.enabled) return + if (registry.has(span)) { + this._handleFailure(`LLMObs Span "${span._name}" already registered.`) + return + } + + registry.set(span, {}) + } + + _setTag (span, key, value) { + if (!this._config.llmobs.enabled) return + if (!registry.has(span)) { + this._handleFailure('Span must be an LLMObs generated span.') + return + } + + const tagsCarrier = registry.get(span) + Object.assign(tagsCarrier, { [key]: value }) + } +} + +module.exports = LLMObsTagger diff --git a/packages/dd-trace/src/llmobs/util.js b/packages/dd-trace/src/llmobs/util.js new file mode 100644 index 00000000000..feba656f952 --- /dev/null +++ b/packages/dd-trace/src/llmobs/util.js @@ -0,0 +1,176 @@ +'use strict' + +const { SPAN_KINDS } = require('./constants/tags') + +function encodeUnicode (str) { + if (!str) return str + return str.split('').map(char => { + const code = char.charCodeAt(0) + if (code > 127) { + return `\\u${code.toString(16).padStart(4, '0')}` + } + return char + }).join('') +} + +function validateKind (kind) { + if (!SPAN_KINDS.includes(kind)) { + throw new Error(` + Invalid span kind specified: "${kind}" + Must be one of: ${SPAN_KINDS.join(', ')} + `) + } + + return kind +} + +// extracts the argument names from a function string +function parseArgumentNames (str) { + const result = [] + let current = '' + let closerCount = 0 + let recording = true + let inSingleLineComment = false + let inMultiLineComment = false + + for (let i = 0; i < str.length; i++) { + const char = str[i] + const nextChar = str[i + 1] + + // Handle single-line comments + if (!inMultiLineComment && char === '/' && nextChar === '/') { + inSingleLineComment = true + i++ // Skip the next character + continue + } + + // Handle multi-line comments + if (!inSingleLineComment && char === '/' && nextChar === '*') { + inMultiLineComment = true + i++ // Skip the next character + continue + } + + // End of single-line comment + if (inSingleLineComment && char === '\n') { + inSingleLineComment = false + continue + } + + // End of multi-line comment + if (inMultiLineComment && char === '*' && nextChar === '/') { + inMultiLineComment = false + i++ // Skip the next character + continue + } + + // Skip characters inside comments + if (inSingleLineComment || inMultiLineComment) { + continue + } + + if (['{', '[', '('].includes(char)) { + closerCount++ + } else if (['}', ']', ')'].includes(char)) { + closerCount-- + } else if (char === '=' && nextChar !== '>' && closerCount === 0) { + recording = false + // record the variable name early, and stop counting characters until we reach the next comma + result.push(current.trim()) + current = '' + continue + } else if (char === ',' && closerCount === 0) { + if (recording) { + result.push(current.trim()) + current = '' + } + + recording = true + continue + } + + if (recording) { + current += char + } + } + + if (current && recording) { + result.push(current.trim()) + } + + return result +} + +// finds the bounds of the arguments in a function string +function findArgumentsBounds (str) { + let start = -1 + let end = -1 + let closerCount = 0 + + for (let i = 0; i < str.length; i++) { + const char = str[i] + + if (char === '(') { + if (closerCount === 0) { + start = i + } + + closerCount++ + } else if (char === ')') { + closerCount-- + + if (closerCount === 0) { + end = i + break + } + } + } + + return { start, end } +} + +const memo = new WeakMap() +function getFunctionArguments (fn, args = []) { + if (!fn) return + if (!args.length) return + if (args.length === 1) return args[0] + + try { + let names + if (memo.has(fn)) { + names = memo.get(fn) + } else { + const fnString = fn.toString() + const { start, end } = findArgumentsBounds(fnString) + names = parseArgumentNames(fnString.slice(start + 1, end)) + memo.set(fn, names) + } + + const argsObject = {} + + for (const argIdx in args) { + const name = names[argIdx] + const arg = args[argIdx] + + const spread = name?.startsWith('...') + + // this can only be the last argument + if (spread) { + argsObject[name.slice(3)] = args.slice(argIdx) + break + } + + argsObject[name] = arg + } + + return argsObject + } catch { + return args + } +} + +module.exports = { + encodeUnicode, + validateKind, + getFunctionArguments +} diff --git a/packages/dd-trace/src/llmobs/writers/base.js b/packages/dd-trace/src/llmobs/writers/base.js new file mode 100644 index 00000000000..8a6cdae9c2f --- /dev/null +++ b/packages/dd-trace/src/llmobs/writers/base.js @@ -0,0 +1,111 @@ +'use strict' + +const request = require('../../exporters/common/request') +const { URL, format } = require('url') + +const logger = require('../../log') + +const { encodeUnicode } = require('../util') +const log = require('../../log') + +class BaseLLMObsWriter { + constructor ({ interval, timeout, endpoint, intake, eventType, protocol, port }) { + this._interval = interval || 1000 // 1s + this._timeout = timeout || 5000 // 5s + this._eventType = eventType + + this._buffer = [] + this._bufferLimit = 1000 + this._bufferSize = 0 + + this._url = new URL(format({ + protocol: protocol || 'https:', + hostname: intake, + port: port || 443, + pathname: endpoint + })) + + this._headers = { + 'Content-Type': 'application/json' + } + + this._periodic = setInterval(() => { + this.flush() + }, this._interval).unref() + + process.once('beforeExit', () => { + this.destroy() + }) + + this._destroyed = false + + logger.debug(`Started ${this.constructor.name} to ${this._url}`) + } + + append (event, byteLength) { + if (this._buffer.length >= this._bufferLimit) { + logger.warn(`${this.constructor.name} event buffer full (limit is ${this._bufferLimit}), dropping event`) + return + } + + this._bufferSize += byteLength || Buffer.from(JSON.stringify(event)).byteLength + this._buffer.push(event) + } + + flush () { + if (this._buffer.length === 0) { + return + } + + const events = this._buffer + this._buffer = [] + this._bufferSize = 0 + const payload = this._encode(this.makePayload(events)) + + const options = { + headers: this._headers, + method: 'POST', + url: this._url, + timeout: this._timeout + } + + log.debug(`Encoded LLMObs payload: ${payload}`) + + request(payload, options, (err, resp, code) => { + if (err) { + logger.error( + `Error sending ${events.length} LLMObs ${this._eventType} events to ${this._url}: ${err.message}` + ) + } else if (code >= 300) { + logger.error( + `Error sending ${events.length} LLMObs ${this._eventType} events to ${this._url}: ${code}` + ) + } else { + logger.debug(`Sent ${events.length} LLMObs ${this._eventType} events to ${this._url}`) + } + }) + } + + makePayload (events) {} + + destroy () { + if (!this._destroyed) { + logger.debug(`Stopping ${this.constructor.name}`) + clearInterval(this._periodic) + process.removeListener('beforeExit', this.destroy) + this.flush() + this._destroyed = true + } + } + + _encode (payload) { + return JSON.stringify(payload, (key, value) => { + if (typeof value === 'string') { + return encodeUnicode(value) // serialize unicode characters + } + return value + }).replace(/\\\\u/g, '\\u') // remove double escaping + } +} + +module.exports = BaseLLMObsWriter diff --git a/packages/dd-trace/src/llmobs/writers/evaluations.js b/packages/dd-trace/src/llmobs/writers/evaluations.js new file mode 100644 index 00000000000..d737f68c82c --- /dev/null +++ b/packages/dd-trace/src/llmobs/writers/evaluations.js @@ -0,0 +1,29 @@ +'use strict' + +const { AGENTLESS_EVALULATIONS_ENDPOINT } = require('../constants/writers') +const BaseWriter = require('./base') + +class LLMObsEvalMetricsWriter extends BaseWriter { + constructor (config) { + super({ + endpoint: AGENTLESS_EVALULATIONS_ENDPOINT, + intake: `api.${config.site}`, + eventType: 'evaluation_metric' + }) + + this._headers['DD-API-KEY'] = config.apiKey + } + + makePayload (events) { + return { + data: { + type: this._eventType, + attributes: { + metrics: events + } + } + } + } +} + +module.exports = LLMObsEvalMetricsWriter diff --git a/packages/dd-trace/src/llmobs/writers/spans/agentProxy.js b/packages/dd-trace/src/llmobs/writers/spans/agentProxy.js new file mode 100644 index 00000000000..6274f6117e0 --- /dev/null +++ b/packages/dd-trace/src/llmobs/writers/spans/agentProxy.js @@ -0,0 +1,23 @@ +'use strict' + +const { + EVP_SUBDOMAIN_HEADER_NAME, + EVP_SUBDOMAIN_HEADER_VALUE, + EVP_PROXY_AGENT_ENDPOINT +} = require('../../constants/writers') +const LLMObsBaseSpanWriter = require('./base') + +class LLMObsAgentProxySpanWriter extends LLMObsBaseSpanWriter { + constructor (config) { + super({ + intake: config.hostname || 'localhost', + protocol: 'http:', + endpoint: EVP_PROXY_AGENT_ENDPOINT, + port: config.port + }) + + this._headers[EVP_SUBDOMAIN_HEADER_NAME] = EVP_SUBDOMAIN_HEADER_VALUE + } +} + +module.exports = LLMObsAgentProxySpanWriter diff --git a/packages/dd-trace/src/llmobs/writers/spans/agentless.js b/packages/dd-trace/src/llmobs/writers/spans/agentless.js new file mode 100644 index 00000000000..452f41d541a --- /dev/null +++ b/packages/dd-trace/src/llmobs/writers/spans/agentless.js @@ -0,0 +1,17 @@ +'use strict' + +const { AGENTLESS_SPANS_ENDPOINT } = require('../../constants/writers') +const LLMObsBaseSpanWriter = require('./base') + +class LLMObsAgentlessSpanWriter extends LLMObsBaseSpanWriter { + constructor (config) { + super({ + intake: `llmobs-intake.${config.site}`, + endpoint: AGENTLESS_SPANS_ENDPOINT + }) + + this._headers['DD-API-KEY'] = config.apiKey + } +} + +module.exports = LLMObsAgentlessSpanWriter diff --git a/packages/dd-trace/src/llmobs/writers/spans/base.js b/packages/dd-trace/src/llmobs/writers/spans/base.js new file mode 100644 index 00000000000..f5fe3443f2d --- /dev/null +++ b/packages/dd-trace/src/llmobs/writers/spans/base.js @@ -0,0 +1,49 @@ +'use strict' + +const { EVP_EVENT_SIZE_LIMIT, EVP_PAYLOAD_SIZE_LIMIT } = require('../../constants/writers') +const { DROPPED_VALUE_TEXT } = require('../../constants/text') +const { DROPPED_IO_COLLECTION_ERROR } = require('../../constants/tags') +const BaseWriter = require('../base') +const logger = require('../../../log') + +class LLMObsSpanWriter extends BaseWriter { + constructor (options) { + super({ + ...options, + eventType: 'span' + }) + } + + append (event) { + const eventSizeBytes = Buffer.from(JSON.stringify(event)).byteLength + if (eventSizeBytes > EVP_EVENT_SIZE_LIMIT) { + logger.warn(`Dropping event input/output because its size (${eventSizeBytes}) exceeds the 1MB event size limit`) + event = this._truncateSpanEvent(event) + } + + if (this._bufferSize + eventSizeBytes > EVP_PAYLOAD_SIZE_LIMIT) { + logger.debug('Flusing queue because queing next event will exceed EvP payload limit') + this.flush() + } + + super.append(event, eventSizeBytes) + } + + makePayload (events) { + return { + '_dd.stage': 'raw', + event_type: this._eventType, + spans: events + } + } + + _truncateSpanEvent (event) { + event.meta.input = { value: DROPPED_VALUE_TEXT } + event.meta.output = { value: DROPPED_VALUE_TEXT } + + event.collection_errors = [DROPPED_IO_COLLECTION_ERROR] + return event + } +} + +module.exports = LLMObsSpanWriter diff --git a/packages/dd-trace/src/noop/proxy.js b/packages/dd-trace/src/noop/proxy.js index 417cb846f8d..ec8671a371e 100644 --- a/packages/dd-trace/src/noop/proxy.js +++ b/packages/dd-trace/src/noop/proxy.js @@ -3,16 +3,19 @@ const NoopTracer = require('./tracer') const NoopAppsecSdk = require('../appsec/sdk/noop') const NoopDogStatsDClient = require('./dogstatsd') +const NoopLLMObsSDK = require('../llmobs/noop') const noop = new NoopTracer() const noopAppsec = new NoopAppsecSdk() const noopDogStatsDClient = new NoopDogStatsDClient() +const noopLLMObs = new NoopLLMObsSDK(noop) class Tracer { constructor () { this._tracer = noop this.appsec = noopAppsec this.dogstatsd = noopDogStatsDClient + this.llmobs = noopLLMObs } init () { diff --git a/packages/dd-trace/src/noop/span.js b/packages/dd-trace/src/noop/span.js index bee3ce11702..0bdbf96ef66 100644 --- a/packages/dd-trace/src/noop/span.js +++ b/packages/dd-trace/src/noop/span.js @@ -16,6 +16,9 @@ class NoopSpan { setOperationName (name) { return this } setBaggageItem (key, value) { return this } getBaggageItem (key) {} + getAllBaggageItems () {} + removeBaggageItem (key) { return this } + removeAllBaggageItems () { return this } setTag (key, value) { return this } addTags (keyValueMap) { return this } addLink (link) { return this } diff --git a/packages/dd-trace/src/opentracing/propagation/text_map.js b/packages/dd-trace/src/opentracing/propagation/text_map.js index 1346f85de72..4c67cfa5957 100644 --- a/packages/dd-trace/src/opentracing/propagation/text_map.js +++ b/packages/dd-trace/src/opentracing/propagation/text_map.js @@ -53,6 +53,8 @@ class TextMapPropagator { } inject (spanContext, carrier) { + if (!spanContext || !carrier) return + this._injectBaggageItems(spanContext, carrier) this._injectDatadog(spanContext, carrier) this._injectB3MultipleHeaders(spanContext, carrier) @@ -107,10 +109,35 @@ class TextMapPropagator { } } + _encodeOtelBaggageKey (key) { + let encoded = encodeURIComponent(key) + encoded = encoded.replaceAll('(', '%28') + encoded = encoded.replaceAll(')', '%29') + return encoded + } + _injectBaggageItems (spanContext, carrier) { - spanContext._baggageItems && Object.keys(spanContext._baggageItems).forEach(key => { - carrier[baggagePrefix + key] = String(spanContext._baggageItems[key]) - }) + if (this._config.legacyBaggageEnabled) { + spanContext._baggageItems && Object.keys(spanContext._baggageItems).forEach(key => { + carrier[baggagePrefix + key] = String(spanContext._baggageItems[key]) + }) + } + if (this._hasPropagationStyle('inject', 'baggage')) { + let baggage = '' + let itemCounter = 0 + let byteCounter = 0 + + for (const [key, value] of Object.entries(spanContext._baggageItems)) { + const item = `${this._encodeOtelBaggageKey(String(key).trim())}=${encodeURIComponent(String(value).trim())},` + itemCounter += 1 + byteCounter += item.length + if (itemCounter > this._config.baggageMaxItems || byteCounter > this._config.baggageMaxBytes) break + baggage += item + } + + baggage = baggage.slice(0, baggage.length - 1) + if (baggage) carrier.baggage = baggage + } } _injectTags (spanContext, carrier) { @@ -299,6 +326,11 @@ class TextMapPropagator { default: log.warn(`Unknown propagation style: ${extractor}`) } + + if (this._config.tracePropagationStyle.extract.includes('baggage') && carrier.baggage) { + spanContext = spanContext || new DatadogSpanContext() + this._extractBaggageItems(carrier, spanContext) + } } return spanContext || this._extractSqsdContext(carrier) @@ -310,7 +342,7 @@ class TextMapPropagator { if (!spanContext) return spanContext this._extractOrigin(carrier, spanContext) - this._extractBaggageItems(carrier, spanContext) + this._extractLegacyBaggageItems(carrier, spanContext) this._extractSamplingPriority(carrier, spanContext) this._extractTags(carrier, spanContext) @@ -383,7 +415,7 @@ class TextMapPropagator { return null } const matches = headerValue.trim().match(traceparentExpr) - if (matches.length) { + if (matches?.length) { const [version, traceId, spanId, flags, tail] = matches.slice(1) const traceparent = { version } const tracestate = TraceState.fromString(carrier.tracestate) @@ -444,7 +476,7 @@ class TextMapPropagator { } }) - this._extractBaggageItems(carrier, spanContext) + this._extractLegacyBaggageItems(carrier, spanContext) return spanContext } return null @@ -528,14 +560,43 @@ class TextMapPropagator { } } - _extractBaggageItems (carrier, spanContext) { - Object.keys(carrier).forEach(key => { - const match = key.match(baggageExpr) + _decodeOtelBaggageKey (key) { + let decoded = decodeURIComponent(key) + decoded = decoded.replaceAll('%28', '(') + decoded = decoded.replaceAll('%29', ')') + return decoded + } - if (match) { - spanContext._baggageItems[match[1]] = carrier[key] + _extractLegacyBaggageItems (carrier, spanContext) { + if (this._config.legacyBaggageEnabled) { + Object.keys(carrier).forEach(key => { + const match = key.match(baggageExpr) + + if (match) { + spanContext._baggageItems[match[1]] = carrier[key] + } + }) + } + } + + _extractBaggageItems (carrier, spanContext) { + const baggages = carrier.baggage.split(',') + for (const keyValue of baggages) { + if (!keyValue.includes('=')) { + spanContext._baggageItems = {} + return } - }) + let [key, value] = keyValue.split('=') + key = this._decodeOtelBaggageKey(key.trim()) + value = decodeURIComponent(value.trim()) + if (!key || !value) { + spanContext._baggageItems = {} + return + } + // the current code assumes precedence of ot-baggage- (legacy opentracing baggage) over baggage + if (key in spanContext._baggageItems) return + spanContext._baggageItems[key] = value + } } _extractSamplingPriority (carrier, spanContext) { diff --git a/packages/dd-trace/src/opentracing/span.js b/packages/dd-trace/src/opentracing/span.js index 723597ff043..5a50166aa49 100644 --- a/packages/dd-trace/src/opentracing/span.js +++ b/packages/dd-trace/src/opentracing/span.js @@ -145,6 +145,18 @@ class DatadogSpan { return this._spanContext._baggageItems[key] } + getAllBaggageItems () { + return JSON.stringify(this._spanContext._baggageItems) + } + + removeBaggageItem (key) { + delete this._spanContext._baggageItems[key] + } + + removeAllBaggageItems () { + this._spanContext._baggageItems = {} + } + setTag (key, value) { this._addTags({ [key]: value }) return this diff --git a/packages/dd-trace/src/payload-tagging/config/aws.json b/packages/dd-trace/src/payload-tagging/config/aws.json index 400b25bf670..0a63a9ab388 100644 --- a/packages/dd-trace/src/payload-tagging/config/aws.json +++ b/packages/dd-trace/src/payload-tagging/config/aws.json @@ -17,14 +17,82 @@ "$.Attributes.Token", "$.Endpoints.*.Token", "$.PhoneNumber", - "$.PhoneNumbers", - "$.phoneNumbers", "$.PlatformApplication.*.PlatformCredential", "$.PlatformApplication.*.PlatformPrincipal", - "$.Subscriptions.*.Endpoint" + "$.Subscriptions.*.Endpoint", + "$.PhoneNumbers[*].PhoneNumber", + "$.phoneNumbers[*]" ], "expand": [ "$.MessageAttributes.*.StringValue" ] + }, + "eventbridge": { + "request": [ + "$.AuthParameters.OAuthParameters.OAuthHttpParameters.HeaderParameters[*].Value", + "$.AuthParameters.OAuthParameters.OAuthHttpParameters.QueryStringParameters[*].Value", + "$.AuthParameters.OAuthParameters.OAuthHttpParameters.BodyParameters[*].Value", + "$.AuthParameters.InvocationHttpParameters.HeaderParameters[*].Value", + "$.AuthParameters.InvocationHttpParameters.QueryStringParameters[*].Value", + "$.AuthParameters.InvocationHttpParameters.BodyParameters[*].Value", + "$.Targets[*].RedshiftDataParameters.Sql", + "$.Targets[*].RedshiftDataParameters.Sqls", + "$.Targets[*].AppSyncParameters.GraphQLOperation", + "$.AuthParameters.BasicAuthParameters.Password", + "$.AuthParameters.OAuthParameters.ClientParameters.ClientSecret", + "$.AuthParameters.ApiKeyAuthParameters.ApiKeyValue" + ], + "response": [ + "$.AuthParameters.OAuthParameters.OAuthHttpParameters.HeaderParameters[*].Value", + "$.AuthParameters.OAuthParameters.OAuthHttpParameters.QueryStringParameters[*].Value", + "$.AuthParameters.OAuthParameters.OAuthHttpParameters.BodyParameters[*].Value", + "$.AuthParameters.InvocationHttpParameters.HeaderParameters[*].Value", + "$.AuthParameters.InvocationHttpParameters.QueryStringParameters[*].Value", + "$.AuthParameters.InvocationHttpParameters.BodyParameters[*].Value", + "$.Targets[*].RedshiftDataParameters.Sql", + "$.Targets[*].RedshiftDataParameters.Sqls", + "$.Targets[*].AppSyncParameters.GraphQLOperation" + ], + "expand": [ + ] + }, + "s3": { + "request": [ + "$.SSEKMSKeyId", + "$.SSEKMSEncryptionContext", + "$.ServerSideEncryptionConfiguration.Rules[*].ApplyServerSideEncryptionByDefault.KMSMasterKeyID", + "$.InventoryConfiguration.Destination.S3BucketDestination.Encryption.SSEKMS.KeyId", + "$.SSECustomerKey", + "$.CopySourceSSECustomerKey", + "$.RestoreRequest.OutputLocation.S3.Encryption.KMSKeyId" + + ], + "response": [ + "$.SSEKMSKeyId", + "$.SSEKMSEncryptionContext", + "$.ServerSideEncryptionConfiguration.Rules[*].ApplyServerSideEncryptionByDefault.KMSMasterKeyID", + "$.InventoryConfiguration.Destination.S3BucketDestination.Encryption.SSEKMS.KeyId", + "$.Credentials.SecretAccessKey", + "$.Credentials.SessionToken", + "$.InventoryConfigurationList[*].Destination.S3BucketDestination.Encryption.SSEKMS.KeyId" + ], + "expand": [ + ] + }, + "sqs": { + "request": [ + ], + "response": [ + ], + "expand": [ + ] + }, + "kinesis": { + "request": [ + ], + "response": [ + ], + "expand": [ + ] } } diff --git a/packages/dd-trace/src/plugins/outbound.js b/packages/dd-trace/src/plugins/outbound.js index f0a9509269e..44dbfa35aaa 100644 --- a/packages/dd-trace/src/plugins/outbound.js +++ b/packages/dd-trace/src/plugins/outbound.js @@ -7,6 +7,7 @@ const { PEER_SERVICE_REMAP_KEY } = require('../constants') const TracingPlugin = require('./tracing') +const { exitTags } = require('../../../datadog-code-origin') const COMMON_PEER_SVC_SOURCE_TAGS = [ 'net.peer.name', @@ -25,6 +26,14 @@ class OutboundPlugin extends TracingPlugin { }) } + startSpan (...args) { + const span = super.startSpan(...args) + if (this._tracerConfig.codeOriginForSpans.enabled) { + span.addTags(exitTags(this.startSpan)) + } + return span + } + getPeerService (tags) { /** * Compute `peer.service` and associated metadata from available tags, based diff --git a/packages/dd-trace/src/plugins/util/inferred_proxy.js b/packages/dd-trace/src/plugins/util/inferred_proxy.js new file mode 100644 index 00000000000..54fe2cb761b --- /dev/null +++ b/packages/dd-trace/src/plugins/util/inferred_proxy.js @@ -0,0 +1,121 @@ +const log = require('../../log') +const tags = require('../../../../../ext/tags') + +const RESOURCE_NAME = tags.RESOURCE_NAME +const HTTP_ROUTE = tags.HTTP_ROUTE +const SPAN_KIND = tags.SPAN_KIND +const SPAN_TYPE = tags.SPAN_TYPE +const HTTP_URL = tags.HTTP_URL +const HTTP_METHOD = tags.HTTP_METHOD + +const PROXY_HEADER_SYSTEM = 'x-dd-proxy' +const PROXY_HEADER_START_TIME_MS = 'x-dd-proxy-request-time-ms' +const PROXY_HEADER_PATH = 'x-dd-proxy-path' +const PROXY_HEADER_HTTPMETHOD = 'x-dd-proxy-httpmethod' +const PROXY_HEADER_DOMAIN = 'x-dd-proxy-domain-name' +const PROXY_HEADER_STAGE = 'x-dd-proxy-stage' + +const supportedProxies = { + 'aws-apigateway': { + spanName: 'aws.apigateway', + component: 'aws-apigateway' + } +} + +function createInferredProxySpan (headers, childOf, tracer, context) { + if (!headers) { + return null + } + + if (!tracer._config?.inferredProxyServicesEnabled) { + return null + } + + const proxyContext = extractInferredProxyContext(headers) + + if (!proxyContext) { + return null + } + + const proxySpanInfo = supportedProxies[proxyContext.proxySystemName] + + log.debug(`Successfully extracted inferred span info ${proxyContext} for proxy: ${proxyContext.proxySystemName}`) + + const span = tracer.startSpan( + proxySpanInfo.spanName, + { + childOf, + type: 'web', + startTime: proxyContext.requestTime, + tags: { + service: proxyContext.domainName || tracer._config.service, + component: proxySpanInfo.component, + [SPAN_KIND]: 'internal', + [SPAN_TYPE]: 'web', + [HTTP_METHOD]: proxyContext.method, + [HTTP_URL]: proxyContext.domainName + proxyContext.path, + [HTTP_ROUTE]: proxyContext.path, + stage: proxyContext.stage + } + } + ) + + tracer.scope().activate(span) + context.inferredProxySpan = span + childOf = span + + log.debug('Successfully created inferred proxy span.') + + setInferredProxySpanTags(span, proxyContext) + + return childOf +} + +function setInferredProxySpanTags (span, proxyContext) { + span.setTag(RESOURCE_NAME, `${proxyContext.method} ${proxyContext.path}`) + span.setTag('_dd.inferred_span', '1') + return span +} + +function extractInferredProxyContext (headers) { + if (!(PROXY_HEADER_START_TIME_MS in headers)) { + return null + } + + if (!(PROXY_HEADER_SYSTEM in headers && headers[PROXY_HEADER_SYSTEM] in supportedProxies)) { + log.debug(`Received headers to create inferred proxy span but headers include an unsupported proxy type ${headers}`) + return null + } + + return { + requestTime: headers[PROXY_HEADER_START_TIME_MS] + ? parseInt(headers[PROXY_HEADER_START_TIME_MS], 10) + : null, + method: headers[PROXY_HEADER_HTTPMETHOD], + path: headers[PROXY_HEADER_PATH], + stage: headers[PROXY_HEADER_STAGE], + domainName: headers[PROXY_HEADER_DOMAIN], + proxySystemName: headers[PROXY_HEADER_SYSTEM] + } +} + +function finishInferredProxySpan (context) { + const { req } = context + + if (!context.inferredProxySpan) return + + if (context.inferredProxySpanFinished && !req.stream) return + + // context.config.hooks.request(context.inferredProxySpan, req, res) # TODO: Do we need this?? + + // Only close the inferred span if one was created + if (context.inferredProxySpan) { + context.inferredProxySpan.finish() + context.inferredProxySpanFinished = true + } +} + +module.exports = { + createInferredProxySpan, + finishInferredProxySpan +} diff --git a/packages/dd-trace/src/plugins/util/web.js b/packages/dd-trace/src/plugins/util/web.js index 832044b29f8..374490c3bf0 100644 --- a/packages/dd-trace/src/plugins/util/web.js +++ b/packages/dd-trace/src/plugins/util/web.js @@ -10,6 +10,7 @@ const kinds = require('../../../../../ext/kinds') const urlFilter = require('./urlfilter') const { extractIp } = require('./ip_extractor') const { ERROR_MESSAGE, ERROR_TYPE, ERROR_STACK } = require('../../constants') +const { createInferredProxySpan, finishInferredProxySpan } = require('./inferred_proxy') const WEB = types.WEB const SERVER = kinds.SERVER @@ -97,7 +98,7 @@ const web = { context.span.context()._name = name span = context.span } else { - span = web.startChildSpan(tracer, name, req.headers) + span = web.startChildSpan(tracer, name, req) } context.tracer = tracer @@ -253,8 +254,19 @@ const web = { }, // Extract the parent span from the headers and start a new span as its child - startChildSpan (tracer, name, headers) { - const childOf = tracer.extract(FORMAT_HTTP_HEADERS, headers) + startChildSpan (tracer, name, req) { + const headers = req.headers + const context = contexts.get(req) + let childOf = tracer.extract(FORMAT_HTTP_HEADERS, headers) + + // we may have headers signaling a router proxy span should be created (such as for AWS API Gateway) + if (tracer._config?.inferredProxyServicesEnabled) { + const proxySpan = createInferredProxySpan(headers, childOf, tracer, context) + if (proxySpan) { + childOf = proxySpan + } + } + const span = tracer.startSpan(name, { childOf }) return span @@ -263,13 +275,21 @@ const web = { // Validate a request's status code and then add error tags if necessary addStatusError (req, statusCode) { const context = contexts.get(req) - const span = context.span - const error = context.error - const hasExistingError = span.context()._tags.error || span.context()._tags[ERROR_MESSAGE] + const { span, inferredProxySpan, error } = context + + const spanHasExistingError = span.context()._tags.error || span.context()._tags[ERROR_MESSAGE] + const inferredSpanContext = inferredProxySpan?.context() + const inferredSpanHasExistingError = inferredSpanContext?._tags.error || inferredSpanContext?._tags[ERROR_MESSAGE] - if (!hasExistingError && !context.config.validateStatus(statusCode)) { + const isValidStatusCode = context.config.validateStatus(statusCode) + + if (!spanHasExistingError && !isValidStatusCode) { span.setTag(ERROR, error || true) } + + if (inferredProxySpan && !inferredSpanHasExistingError && !isValidStatusCode) { + inferredProxySpan.setTag(ERROR, error || true) + } }, // Add an error to the request @@ -316,6 +336,8 @@ const web = { web.finishMiddleware(context) web.finishSpan(context) + + finishInferredProxySpan(context) }, obfuscateQs (config, url) { @@ -426,7 +448,7 @@ function reactivate (req, fn) { } function addRequestTags (context, spanType) { - const { req, span, config } = context + const { req, span, inferredProxySpan, config } = context const url = extractURL(req) span.addTags({ @@ -443,6 +465,7 @@ function addRequestTags (context, spanType) { if (clientIp) { span.setTag(HTTP_CLIENT_IP, clientIp) + inferredProxySpan?.setTag(HTTP_CLIENT_IP, clientIp) } } @@ -450,7 +473,7 @@ function addRequestTags (context, spanType) { } function addResponseTags (context) { - const { req, res, paths, span } = context + const { req, res, paths, span, inferredProxySpan } = context if (paths.length > 0) { span.setTag(HTTP_ROUTE, paths.join('')) @@ -459,6 +482,9 @@ function addResponseTags (context) { span.addTags({ [HTTP_STATUS_CODE]: res.statusCode }) + inferredProxySpan?.addTags({ + [HTTP_STATUS_CODE]: res.statusCode + }) web.addStatusError(req, res.statusCode) } @@ -477,7 +503,7 @@ function addResourceTag (context) { } function addHeaders (context) { - const { req, res, config, span } = context + const { req, res, config, span, inferredProxySpan } = context config.headers.forEach(([key, tag]) => { const reqHeader = req.headers[key] @@ -485,10 +511,12 @@ function addHeaders (context) { if (reqHeader) { span.setTag(tag || `${HTTP_REQUEST_HEADERS}.${key}`, reqHeader) + inferredProxySpan?.setTag(tag || `${HTTP_REQUEST_HEADERS}.${key}`, reqHeader) } if (resHeader) { span.setTag(tag || `${HTTP_RESPONSE_HEADERS}.${key}`, resHeader) + inferredProxySpan?.setTag(tag || `${HTTP_RESPONSE_HEADERS}.${key}`, resHeader) } }) } diff --git a/packages/dd-trace/src/priority_sampler.js b/packages/dd-trace/src/priority_sampler.js index aae366c2622..f9968a41194 100644 --- a/packages/dd-trace/src/priority_sampler.js +++ b/packages/dd-trace/src/priority_sampler.js @@ -108,6 +108,18 @@ class PrioritySampler { } } + setPriority (span, samplingPriority, mechanism = SAMPLING_MECHANISM_MANUAL) { + if (!span || !this.validate(samplingPriority)) return + + const context = this._getContext(span) + + context._sampling.priority = samplingPriority + context._sampling.mechanism = mechanism + + const root = context._trace.started[0] + this._addDecisionMaker(root) + } + _getContext (span) { return typeof span.context === 'function' ? span.context() : span } @@ -201,6 +213,10 @@ class PrioritySampler { if (rule.match(span)) return rule } } + + static keepTrace (span, mechanism) { + span?._prioritySampler?.setPriority(span, USER_KEEP, mechanism) + } } module.exports = PrioritySampler diff --git a/packages/dd-trace/src/profiling/config.js b/packages/dd-trace/src/profiling/config.js index 538400aaa7a..3c360d65f7a 100644 --- a/packages/dd-trace/src/profiling/config.js +++ b/packages/dd-trace/src/profiling/config.js @@ -14,6 +14,7 @@ const { oomExportStrategies, snapshotKinds } = require('./constants') const { GIT_REPOSITORY_URL, GIT_COMMIT_SHA } = require('../plugins/util/tags') const { tagger } = require('./tagger') const { isFalse, isTrue } = require('../util') +const { getAzureTagsFromMetadata, getAzureAppMetadata } = require('../azure_metadata') class Config { constructor (options = {}) { @@ -71,7 +72,8 @@ class Config { this.tags = Object.assign( tagger.parse(DD_TAGS), tagger.parse(options.tags), - tagger.parse({ env, host, service, version, functionname }) + tagger.parse({ env, host, service, version, functionname }), + getAzureTagsFromMetadata(getAzureAppMetadata()) ) // Add source code integration tags if available diff --git a/packages/dd-trace/src/profiling/exporters/agent.js b/packages/dd-trace/src/profiling/exporters/agent.js index b34ab3c9d94..01363d6d2c5 100644 --- a/packages/dd-trace/src/profiling/exporters/agent.js +++ b/packages/dd-trace/src/profiling/exporters/agent.js @@ -195,11 +195,13 @@ class AgentExporter { }) sendRequest(options, form, (err, response) => { - if (operation.retry(err)) { - this._logger.error(`Error from the agent: ${err.message}`) - return - } else if (err) { - reject(err) + if (err) { + const { status } = err + if ((typeof status !== 'number' || status >= 500 || status === 429) && operation.retry(err)) { + this._logger.error(`Error from the agent: ${err.message}`) + } else { + reject(err) + } return } diff --git a/packages/dd-trace/src/profiling/profilers/wall.js b/packages/dd-trace/src/profiling/profilers/wall.js index 3d7041cfecf..dc3c0ba61ba 100644 --- a/packages/dd-trace/src/profiling/profilers/wall.js +++ b/packages/dd-trace/src/profiling/profilers/wall.js @@ -301,7 +301,8 @@ class NativeWallProfiler { const labels = { ...getThreadLabels() } - const { context: { ref: { spanId, rootSpanId, webTags, endpoint } }, timestamp } = context + const { context: { ref }, timestamp } = context + const { spanId, rootSpanId, webTags, endpoint } = ref ?? {} if (this._timelineEnabled) { // Incoming timestamps are in microseconds, we emit nanos. diff --git a/packages/dd-trace/src/proxy.js b/packages/dd-trace/src/proxy.js index b8916b205d4..32a7dcee10a 100644 --- a/packages/dd-trace/src/proxy.js +++ b/packages/dd-trace/src/proxy.js @@ -16,6 +16,7 @@ const NoopDogStatsDClient = require('./noop/dogstatsd') const spanleak = require('./spanleak') const { SSIHeuristics } = require('./profiling/ssi-heuristics') const appsecStandalone = require('./appsec/standalone') +const LLMObsSDK = require('./llmobs/sdk') class LazyModule { constructor (provider) { @@ -46,7 +47,8 @@ class Tracer extends NoopProxy { // these requires must work with esm bundler this._modules = { appsec: new LazyModule(() => require('./appsec')), - iast: new LazyModule(() => require('./appsec/iast')) + iast: new LazyModule(() => require('./appsec/iast')), + llmobs: new LazyModule(() => require('./llmobs')) } } @@ -195,11 +197,15 @@ class Tracer extends NoopProxy { if (config.appsec.enabled) { this._modules.appsec.enable(config) } + if (config.llmobs.enabled) { + this._modules.llmobs.enable(config) + } if (!this._tracingInitialized) { const prioritySampler = appsecStandalone.configure(config) this._tracer = new DatadogTracer(config, prioritySampler) this.dataStreamsCheckpointer = this._tracer.dataStreamsCheckpointer this.appsec = new AppsecSdk(this._tracer, config) + this.llmobs = new LLMObsSDK(this._tracer, this._modules.llmobs, config) this._tracingInitialized = true } if (config.iast.enabled) { @@ -208,6 +214,7 @@ class Tracer extends NoopProxy { } else if (this._tracingInitialized) { this._modules.appsec.disable() this._modules.iast.disable() + this._modules.llmobs.disable() } if (this._tracingInitialized) { diff --git a/packages/dd-trace/src/span_processor.js b/packages/dd-trace/src/span_processor.js index 6dc19407d56..deb92c02f34 100644 --- a/packages/dd-trace/src/span_processor.js +++ b/packages/dd-trace/src/span_processor.js @@ -10,6 +10,9 @@ const { SpanStatsProcessor } = require('./span_stats') const startedSpans = new WeakSet() const finishedSpans = new WeakSet() +const { channel } = require('dc-polyfill') +const spanProcessCh = channel('dd-trace:span:process') + class SpanProcessor { constructor (exporter, prioritySampler, config) { this._exporter = exporter @@ -45,6 +48,8 @@ class SpanProcessor { const formattedSpan = format(span) this._stats.onSpanFinished(formattedSpan) formatted.push(formattedSpan) + + spanProcessCh.publish({ span }) } else { active.push(span) } diff --git a/packages/dd-trace/src/telemetry/metrics.js b/packages/dd-trace/src/telemetry/metrics.js index 34740aa7f2d..2c42bc23825 100644 --- a/packages/dd-trace/src/telemetry/metrics.js +++ b/packages/dd-trace/src/telemetry/metrics.js @@ -27,13 +27,18 @@ function hasPoints (metric) { return metric.points.length > 0 } +let versionTag + class Metric { constructor (namespace, metric, common, tags) { this.namespace = namespace.toString() this.metric = common ? metric : `nodejs.${metric}` this.tags = tagArray(tags) if (common) { - this.tags.push(`version:${process.version}`) + if (versionTag === undefined) { + versionTag = `version:${process.version}` + } + this.tags.push(versionTag) } this.common = common diff --git a/packages/dd-trace/test/appsec/iast/analyzers/header-injection.express.plugin.spec.js b/packages/dd-trace/test/appsec/iast/analyzers/header-injection.express.plugin.spec.js index bdb9734377a..7af02e47637 100644 --- a/packages/dd-trace/test/appsec/iast/analyzers/header-injection.express.plugin.spec.js +++ b/packages/dd-trace/test/appsec/iast/analyzers/header-injection.express.plugin.spec.js @@ -106,170 +106,314 @@ describe('Header injection vulnerability', () => { }, vulnerability: 'HEADER_INJECTION' }) + } + ) - testThatRequestHasNoVulnerability({ - testDescription: 'should not have HEADER_INJECTION vulnerability when the header is "location"', - fn: (req, res) => { - setHeaderFunction('location', req.body.test, res) - }, - vulnerability: 'HEADER_INJECTION', - makeRequest: (done, config) => { - return axios.post(`http://localhost:${config.port}/`, { - test: 'https://www.datadoghq.com' - }).catch(done) - } - }) + describe('Header Injection exclusions', () => { + let i = 0 + let setHeaderFunctionsPath - testThatRequestHasNoVulnerability({ - testDescription: 'should not have HEADER_INJECTION vulnerability when the header is "Sec-WebSocket-Location"', - fn: (req, res) => { - setHeaderFunction('Sec-WebSocket-Location', req.body.test, res) - }, - vulnerability: 'HEADER_INJECTION', - makeRequest: (done, config) => { - return axios.post(`http://localhost:${config.port}/`, { - test: 'https://www.datadoghq.com' - }).catch(done) - } - }) + before(() => { + setHeaderFunctionsPath = path.join(os.tmpdir(), `set-header-function-${i++}.js`) + fs.copyFileSync( + path.join(__dirname, 'resources', 'set-header-function.js'), + setHeaderFunctionsPath + ) + }) - testThatRequestHasNoVulnerability({ - testDescription: 'should not have HEADER_INJECTION vulnerability when the header is "Sec-WebSocket-Accept"', - fn: (req, res) => { - setHeaderFunction('Sec-WebSocket-Accept', req.body.test, res) - }, - vulnerability: 'HEADER_INJECTION', - makeRequest: (done, config) => { - return axios.post(`http://localhost:${config.port}/`, { - test: 'https://www.datadoghq.com' - }).catch(done) - } - }) + after(() => { + fs.unlinkSync(setHeaderFunctionsPath) + }) - testThatRequestHasNoVulnerability({ - testDescription: 'should not have HEADER_INJECTION vulnerability when the header is "Upgrade"', - fn: (req, res) => { - setHeaderFunction('Upgrade', req.body.test, res) - }, - vulnerability: 'HEADER_INJECTION', - makeRequest: (done, config) => { - return axios.post(`http://localhost:${config.port}/`, { - test: 'https://www.datadoghq.com' - }).catch(done) - } - }) + prepareTestServerForIastInExpress('in express', version, + (testThatRequestHasVulnerability, testThatRequestHasNoVulnerability) => { + testThatRequestHasNoVulnerability({ + testDescription: 'should not have HEADER_INJECTION vulnerability when the header is "location"', + fn: (req, res) => { + setHeaderFunction('location', req.body.test, res) + }, + vulnerability: 'HEADER_INJECTION', + makeRequest: (done, config) => { + return axios.post(`http://localhost:${config.port}/`, { + test: 'https://www.datadoghq.com' + }).catch(done) + } + }) - testThatRequestHasNoVulnerability({ - testDescription: 'should not have HEADER_INJECTION vulnerability when the header is "Connection"', - fn: (req, res) => { - setHeaderFunction('Upgrade', req.body.test, res) - }, - vulnerability: 'HEADER_INJECTION', - makeRequest: (done, config) => { - return axios.post(`http://localhost:${config.port}/`, { - test: 'https://www.datadoghq.com' - }).catch(done) - } - }) + testThatRequestHasNoVulnerability({ + testDescription: 'should not have HEADER_INJECTION vulnerability ' + + 'when the header is "Sec-WebSocket-Location"', + fn: (req, res) => { + setHeaderFunction('Sec-WebSocket-Location', req.body.test, res) + }, + vulnerability: 'HEADER_INJECTION', + makeRequest: (done, config) => { + return axios.post(`http://localhost:${config.port}/`, { + test: 'https://www.datadoghq.com' + }).catch(done) + } + }) - testThatRequestHasNoVulnerability({ - testDescription: 'should not have HEADER_INJECTION vulnerability ' + - 'when the header is "access-control-allow-origin" and the origin is a header', - fn: (req, res) => { - setHeaderFunction('access-control-allow-origin', req.headers.testheader, res) - }, - vulnerability: 'HEADER_INJECTION', - makeRequest: (done, config) => { - return axios.get(`http://localhost:${config.port}/`, { - headers: { - testheader: 'headerValue' + testThatRequestHasNoVulnerability({ + testDescription: 'should not have HEADER_INJECTION vulnerability when the header is "Sec-WebSocket-Accept"', + fn: (req, res) => { + setHeaderFunction('Sec-WebSocket-Accept', req.body.test, res) + }, + vulnerability: 'HEADER_INJECTION', + makeRequest: (done, config) => { + return axios.post(`http://localhost:${config.port}/`, { + test: 'https://www.datadoghq.com' + }).catch(done) + } + }) + + testThatRequestHasNoVulnerability({ + testDescription: 'should not have HEADER_INJECTION vulnerability when the header is "Upgrade"', + fn: (req, res) => { + setHeaderFunction('Upgrade', req.body.test, res) + }, + vulnerability: 'HEADER_INJECTION', + makeRequest: (done, config) => { + return axios.post(`http://localhost:${config.port}/`, { + test: 'https://www.datadoghq.com' + }).catch(done) + } + }) + + testThatRequestHasNoVulnerability({ + testDescription: 'should not have HEADER_INJECTION vulnerability when the header is "Connection"', + fn: (req, res) => { + setHeaderFunction('Upgrade', req.body.test, res) + }, + vulnerability: 'HEADER_INJECTION', + makeRequest: (done, config) => { + return axios.post(`http://localhost:${config.port}/`, { + test: 'https://www.datadoghq.com' + }).catch(done) + } + }) + + testThatRequestHasNoVulnerability({ + testDescription: 'should not have HEADER_INJECTION vulnerability ' + + 'when the header is "access-control-allow-origin" and the origin is a header', + fn: (req, res) => { + setHeaderFunction('access-control-allow-origin', req.headers.testheader, res) + }, + vulnerability: 'HEADER_INJECTION', + makeRequest: (done, config) => { + return axios.get(`http://localhost:${config.port}/`, { + headers: { + testheader: 'headerValue' + } + }).catch(done) + } + }) + + testThatRequestHasVulnerability({ + testDescription: 'should have HEADER_INJECTION vulnerability ' + + 'when the header is "access-control-allow-origin" and the origin is not a header', + fn: (req, res) => { + setHeaderFunction('access-control-allow-origin', req.body.test, res) + }, + vulnerability: 'HEADER_INJECTION', + makeRequest: (done, config) => { + return axios.post(`http://localhost:${config.port}/`, { + test: 'https://www.datadoghq.com' + }, { + headers: { + testheader: 'headerValue' + } + }).catch(done) + } + }) + + testThatRequestHasNoVulnerability({ + testDescription: 'should not have HEADER_INJECTION vulnerability ' + + 'when the header is "set-cookie" and the origin is a cookie', + fn: (req, res) => { + setHeaderFunction('set-cookie', req.cookies.cookie1, res) + }, + vulnerability: 'HEADER_INJECTION', + makeRequest: (done, config) => { + return axios.get(`http://localhost:${config.port}/`, { + headers: { + Cookie: 'cookie1=value' + } + }).catch(done) + } + }) + + testThatRequestHasVulnerability({ + testDescription: 'should have HEADER_INJECTION vulnerability when ' + + 'the header is "access-control-allow-origin" and the origin is not a header', + fn: (req, res) => { + setHeaderFunction('access-control-allow-origin', req.body.test, res) + }, + vulnerability: 'HEADER_INJECTION', + makeRequest: (done, config) => { + return axios.post(`http://localhost:${config.port}/`, { + test: 'key=value' + }, { + headers: { + testheader: 'headerValue' + } + }).catch(done) + } + }) + + testThatRequestHasNoVulnerability({ + fn: (req, res) => { + setHeaderFunction('Access-Control-Allow-Origin', req.headers.origin, res) + setHeaderFunction('Access-Control-Allow-Headers', req.headers['access-control-request-headers'], res) + setHeaderFunction('Access-Control-Allow-Methods', req.headers['access-control-request-methods'], res) + }, + testDescription: 'Should not have vulnerability with CORS headers', + vulnerability: 'HEADER_INJECTION', + occurrencesAndLocation: { + occurrences: 1, + location: { + path: setHeaderFunctionFilename, + line: 4 } - }).catch(done) - } - }) + }, + cb: (headerInjectionVulnerabilities) => { + const evidenceString = headerInjectionVulnerabilities[0].evidence.valueParts + .map(part => part.value).join('') + expect(evidenceString).to.be.equal('custom: value') + }, + makeRequest: (done, config) => { + return axios.options(`http://localhost:${config.port}/`, { + headers: { + origin: 'http://custom-origin', + 'Access-Control-Request-Headers': 'TestHeader', + 'Access-Control-Request-Methods': 'GET' + } + }).catch(done) + } + }) - testThatRequestHasVulnerability({ - testDescription: 'should have HEADER_INJECTION vulnerability ' + - 'when the header is "access-control-allow-origin" and the origin is not a header', - fn: (req, res) => { - setHeaderFunction('access-control-allow-origin', req.body.test, res) - }, - vulnerability: 'HEADER_INJECTION', - makeRequest: (done, config) => { - return axios.post(`http://localhost:${config.port}/`, { - test: 'https://www.datadoghq.com' - }, { - headers: { - testheader: 'headerValue' + testThatRequestHasVulnerability({ + testDescription: 'should have HEADER_INJECTION vulnerability when ' + + 'the header is "pragma" and the origin is not a header', + fn: (req, res) => { + setHeaderFunction('pragma', req.body.test, res) + }, + vulnerability: 'HEADER_INJECTION', + makeRequest: (done, config) => { + return axios.post(`http://localhost:${config.port}/`, { + test: 'key=value' + }, { + headers: { + testheader: 'headerValue' + } + }).catch(done) + } + }) + + testThatRequestHasVulnerability({ + testDescription: 'should have HEADER_INJECTION vulnerability when ' + + 'the header is "pragma" and the origin is not the cache-control header', + fn: (req, res) => { + setHeaderFunction('pragma', req.headers.testheader, res) + }, + vulnerability: 'HEADER_INJECTION', + makeRequest: (done, config) => { + return axios.post(`http://localhost:${config.port}/`, { + test: 'key=value' + }, { + headers: { + testheader: 'headerValue' + } + }).catch(done) + } + }) + + testThatRequestHasNoVulnerability({ + testDescription: 'should not have HEADER_INJECTION vulnerability ' + + 'when the header is "pragma" and the origin is a cache-control header', + fn: (req, res) => { + setHeaderFunction('pragma', req.headers['cache-control'], res) + }, + vulnerability: 'HEADER_INJECTION', + makeRequest: (done, config) => { + return axios.get(`http://localhost:${config.port}/`, { + headers: { + 'Cache-Control': 'cachecontrolvalue' + } + }).catch(done) + } + }) + + ;['transfer-encoding', 'content-encoding'].forEach((headerName) => { + testThatRequestHasVulnerability({ + testDescription: 'should have HEADER_INJECTION vulnerability when ' + + `the header is "${headerName}" and the origin is not a header`, + fn: (req, res) => { + setHeaderFunction(headerName, req.body.test, res) + }, + vulnerability: 'HEADER_INJECTION', + makeRequest: (done, config) => { + return axios.post(`http://localhost:${config.port}/`, { + test: 'key=value' + }, { + headers: { + testheader: 'headerValue' + } + }).catch(done) } - }).catch(done) - } - }) + }) - testThatRequestHasNoVulnerability({ - testDescription: 'should not have HEADER_INJECTION vulnerability ' + - 'when the header is "set-cookie" and the origin is a cookie', - fn: (req, res) => { - setHeaderFunction('set-cookie', req.cookies.cookie1, res) - }, - vulnerability: 'HEADER_INJECTION', - makeRequest: (done, config) => { - return axios.get(`http://localhost:${config.port}/`, { - headers: { - Cookie: 'cookie1=value' + testThatRequestHasVulnerability({ + testDescription: 'should have HEADER_INJECTION vulnerability when ' + + `the header is "${headerName}" and the origin is not the accept-encoding header`, + fn: (req, res) => { + setHeaderFunction(headerName, req.headers.testheader, res) + }, + vulnerability: 'HEADER_INJECTION', + makeRequest: (done, config) => { + return axios.post(`http://localhost:${config.port}/`, { + test: 'key=value' + }, { + headers: { + testheader: 'headerValue' + } + }).catch(done) } - }).catch(done) - } - }) + }) - testThatRequestHasVulnerability({ - testDescription: 'should have HEADER_INJECTION vulnerability when ' + - 'the header is "access-control-allow-origin" and the origin is not a header', - fn: (req, res) => { - setHeaderFunction('set-cookie', req.body.test, res) - }, - vulnerability: 'HEADER_INJECTION', - makeRequest: (done, config) => { - return axios.post(`http://localhost:${config.port}/`, { - test: 'key=value' - }, { - headers: { - testheader: 'headerValue' + testThatRequestHasNoVulnerability({ + testDescription: 'should not have HEADER_INJECTION vulnerability ' + + `when the header is "${headerName}" and the origin is a accept-encoding header`, + fn: (req, res) => { + setHeaderFunction(headerName, req.headers['accept-encoding'], res) + }, + vulnerability: 'HEADER_INJECTION', + makeRequest: (done, config) => { + return axios.get(`http://localhost:${config.port}/`, { + headers: { + 'Accept-encoding': 'gzip, deflate' + } + }).catch(done) } - }).catch(done) - } - }) + }) - testThatRequestHasNoVulnerability({ - fn: (req, res) => { - setHeaderFunction('Access-Control-Allow-Origin', req.headers.origin, res) - setHeaderFunction('Access-Control-Allow-Headers', req.headers['access-control-request-headers'], res) - setHeaderFunction('Access-Control-Allow-Methods', req.headers['access-control-request-methods'], res) - }, - testDescription: 'Should not have vulnerability with CORS headers', - vulnerability: 'HEADER_INJECTION', - occurrencesAndLocation: { - occurrences: 1, - location: { - path: setHeaderFunctionFilename, - line: 4 - } - }, - cb: (headerInjectionVulnerabilities) => { - const evidenceString = headerInjectionVulnerabilities[0].evidence.valueParts - .map(part => part.value).join('') - expect(evidenceString).to.be.equal('custom: value') - }, - makeRequest: (done, config) => { - return axios.options(`http://localhost:${config.port}/`, { - headers: { - origin: 'http://custom-origin', - 'Access-Control-Request-Headers': 'TestHeader', - 'Access-Control-Request-Methods': 'GET' + testThatRequestHasNoVulnerability({ + testDescription: 'should not have HEADER_INJECTION vulnerability ' + + `when the header is "${headerName}" and the origin is a substring of accept-encoding header`, + fn: (req, res) => { + require(setHeaderFunctionsPath).reflectPartialAcceptEncodingHeader(req, res, headerName) + }, + vulnerability: 'HEADER_INJECTION', + makeRequest: (done, config) => { + return axios.get(`http://localhost:${config.port}/`, { + headers: { + 'Accept-encoding': 'gzip, deflate' + } + }).catch(done) } - }).catch(done) - } + }) + }) }) - }) + }) }) }) diff --git a/packages/dd-trace/test/appsec/iast/analyzers/resources/set-header-function.js b/packages/dd-trace/test/appsec/iast/analyzers/resources/set-header-function.js index f2e4e1d4ef2..1883e13bb16 100644 --- a/packages/dd-trace/test/appsec/iast/analyzers/resources/set-header-function.js +++ b/packages/dd-trace/test/appsec/iast/analyzers/resources/set-header-function.js @@ -4,4 +4,16 @@ function setHeader (name, value, res) { res.setHeader(name, value) } -module.exports = { setHeader } +function reflectPartialAcceptEncodingHeader (req, res, headerName) { + const substringAcceptEncodingValue = + req.headers['accept-encoding'].substring(0, req.headers['accept-encoding'].indexOf(',')) + res.setHeader( + headerName, + substringAcceptEncodingValue + ) +} + +module.exports = { + reflectPartialAcceptEncodingHeader, + setHeader +} diff --git a/packages/dd-trace/test/appsec/iast/taint-tracking/plugin.express.plugin.spec.js b/packages/dd-trace/test/appsec/iast/taint-tracking/plugin.express.plugin.spec.js index f2a8193d1be..a9a995783f1 100644 --- a/packages/dd-trace/test/appsec/iast/taint-tracking/plugin.express.plugin.spec.js +++ b/packages/dd-trace/test/appsec/iast/taint-tracking/plugin.express.plugin.spec.js @@ -2,6 +2,7 @@ const { prepareTestServerForIastInExpress } = require('../utils') const axios = require('axios') +const { URL } = require('url') function noop () {} @@ -47,6 +48,95 @@ describe('Taint tracking plugin sources express tests', () => { childProcess.exec(req.headers['x-iast-test-command'], noop) }, 'COMMAND_INJECTION', 1, noop, makeRequestWithHeader) }) + + describe('url parse taint tracking', () => { + function makePostRequest (done) { + axios.post(`http://localhost:${config.port}/`, { + url: 'http://www.datadoghq.com/' + }).catch(done) + } + + testThatRequestHasVulnerability( + { + fn: (req) => { + // eslint-disable-next-line n/no-deprecated-api + const { parse } = require('url') + const url = parse(req.body.url) + + const childProcess = require('child_process') + childProcess.exec(url.host, noop) + }, + vulnerability: 'COMMAND_INJECTION', + occurrences: 1, + cb: noop, + makeRequest: makePostRequest, + testDescription: 'should detect vulnerability when tainted is coming from url.parse' + }) + + testThatRequestHasVulnerability( + { + fn: (req) => { + const { URL } = require('url') + const url = new URL(req.body.url) + + const childProcess = require('child_process') + childProcess.exec(url.host, noop) + }, + vulnerability: 'COMMAND_INJECTION', + occurrences: 1, + cb: noop, + makeRequest: makePostRequest, + testDescription: 'should detect vulnerability when tainted is coming from new url.URL input' + }) + + testThatRequestHasVulnerability( + { + fn: (req) => { + const { URL } = require('url') + const url = new URL('/path', req.body.url) + + const childProcess = require('child_process') + childProcess.exec(url.host, noop) + }, + vulnerability: 'COMMAND_INJECTION', + occurrences: 1, + cb: noop, + makeRequest: makePostRequest, + testDescription: 'should detect vulnerability when tainted is coming from new url.URL base' + }) + + if (URL.parse) { + testThatRequestHasVulnerability( + { + fn: (req) => { + const { URL } = require('url') + const url = URL.parse(req.body.url) + const childProcess = require('child_process') + childProcess.exec(url.host, noop) + }, + vulnerability: 'COMMAND_INJECTION', + occurrences: 1, + cb: noop, + makeRequest: makePostRequest, + testDescription: 'should detect vulnerability when tainted is coming from url.URL.parse input' + }) + + testThatRequestHasVulnerability( + { + fn: (req) => { + const { URL } = require('url') + const url = URL.parse('/path', req.body.url) + const childProcess = require('child_process') + childProcess.exec(url.host, noop) + }, + vulnerability: 'COMMAND_INJECTION', + occurrences: 1, + cb: noop, + makeRequest: makePostRequest, + testDescription: 'should detect vulnerability when tainted is coming from url.URL.parse base' + }) + } + }) } ) }) diff --git a/packages/dd-trace/test/appsec/iast/taint-tracking/plugin.spec.js b/packages/dd-trace/test/appsec/iast/taint-tracking/plugin.spec.js index 59b7c524aae..1a21b0a5b08 100644 --- a/packages/dd-trace/test/appsec/iast/taint-tracking/plugin.spec.js +++ b/packages/dd-trace/test/appsec/iast/taint-tracking/plugin.spec.js @@ -42,13 +42,16 @@ describe('IAST Taint tracking plugin', () => { }) it('Should subscribe to body parser, qs, cookie and process_params channel', () => { - expect(taintTrackingPlugin._subscriptions).to.have.lengthOf(6) + expect(taintTrackingPlugin._subscriptions).to.have.lengthOf(9) expect(taintTrackingPlugin._subscriptions[0]._channel.name).to.equals('datadog:body-parser:read:finish') - expect(taintTrackingPlugin._subscriptions[1]._channel.name).to.equals('datadog:qs:parse:finish') - expect(taintTrackingPlugin._subscriptions[2]._channel.name).to.equals('apm:express:middleware:next') - expect(taintTrackingPlugin._subscriptions[3]._channel.name).to.equals('datadog:cookie:parse:finish') - expect(taintTrackingPlugin._subscriptions[4]._channel.name).to.equals('datadog:express:process_params:start') - expect(taintTrackingPlugin._subscriptions[5]._channel.name).to.equals('apm:graphql:resolve:start') + expect(taintTrackingPlugin._subscriptions[1]._channel.name).to.equals('datadog:multer:read:finish') + expect(taintTrackingPlugin._subscriptions[2]._channel.name).to.equals('datadog:qs:parse:finish') + expect(taintTrackingPlugin._subscriptions[3]._channel.name).to.equals('apm:express:middleware:next') + expect(taintTrackingPlugin._subscriptions[4]._channel.name).to.equals('datadog:cookie:parse:finish') + expect(taintTrackingPlugin._subscriptions[5]._channel.name).to.equals('datadog:express:process_params:start') + expect(taintTrackingPlugin._subscriptions[6]._channel.name).to.equals('apm:graphql:resolve:start') + expect(taintTrackingPlugin._subscriptions[7]._channel.name).to.equals('datadog:url:parse:finish') + expect(taintTrackingPlugin._subscriptions[8]._channel.name).to.equals('datadog:url:getter:finish') }) describe('taint sources', () => { diff --git a/packages/dd-trace/test/appsec/iast/utils.js b/packages/dd-trace/test/appsec/iast/utils.js index 2ef5a77ee30..6e427bcb629 100644 --- a/packages/dd-trace/test/appsec/iast/utils.js +++ b/packages/dd-trace/test/appsec/iast/utils.js @@ -136,6 +136,7 @@ function endResponse (res, appResult) { function checkNoVulnerabilityInRequest (vulnerability, config, done, makeRequest) { agent .use(traces => { + if (traces[0][0].type !== 'web') throw new Error('Not a web span') // iastJson == undefiend is valid const iastJson = traces[0][0].meta['_dd.iast.json'] || '' expect(iastJson).to.not.include(`"${vulnerability}"`) @@ -288,9 +289,10 @@ function prepareTestServerForIastInExpress (description, expressVersion, loadMid before((done) => { const express = require(`../../../../../versions/express@${expressVersion}`).get() const bodyParser = require('../../../../../versions/body-parser').get() + const expressApp = express() - if (loadMiddlewares) loadMiddlewares(expressApp) + if (loadMiddlewares) loadMiddlewares(expressApp, listener) expressApp.use(bodyParser.json()) try { diff --git a/packages/dd-trace/test/appsec/iast/vulnerability-reporter.spec.js b/packages/dd-trace/test/appsec/iast/vulnerability-reporter.spec.js index f498ef6e122..1f4516218af 100644 --- a/packages/dd-trace/test/appsec/iast/vulnerability-reporter.spec.js +++ b/packages/dd-trace/test/appsec/iast/vulnerability-reporter.spec.js @@ -2,7 +2,8 @@ const { addVulnerability, sendVulnerabilities, clearCache, start, stop } = require('../../../src/appsec/iast/vulnerability-reporter') const VulnerabilityAnalyzer = require('../../../../dd-trace/src/appsec/iast/analyzers/vulnerability-analyzer') const appsecStandalone = require('../../../src/appsec/standalone') -const { APPSEC_PROPAGATION_KEY } = require('../../../src/constants') +const { APPSEC_PROPAGATION_KEY, SAMPLING_MECHANISM_APPSEC } = require('../../../src/constants') +const { USER_KEEP } = require('../../../../../ext/priority') describe('vulnerability-reporter', () => { let vulnerabilityAnalyzer @@ -82,9 +83,14 @@ describe('vulnerability-reporter', () => { describe('without rootSpan', () => { let fakeTracer let onTheFlySpan + let prioritySampler beforeEach(() => { + prioritySampler = { + setPriority: sinon.stub() + } onTheFlySpan = { + _prioritySampler: prioritySampler, finish: sinon.spy(), addTags: sinon.spy(), context () { @@ -120,10 +126,11 @@ describe('vulnerability-reporter', () => { '_dd.iast.enabled': 1 }) expect(onTheFlySpan.addTags.secondCall).to.have.been.calledWithExactly({ - 'manual.keep': 'true', '_dd.iast.json': '{"sources":[],"vulnerabilities":[{"type":"INSECURE_HASHING","hash":3410512655,' + '"evidence":{"value":"sha1"},"location":{"spanId":42,"path":"filename.js","line":73}}]}' }) + expect(prioritySampler.setPriority) + .to.have.been.calledOnceWithExactly(onTheFlySpan, USER_KEEP, SAMPLING_MECHANISM_APPSEC) expect(onTheFlySpan.finish).to.have.been.calledOnce }) @@ -140,10 +147,15 @@ describe('vulnerability-reporter', () => { describe('sendVulnerabilities', () => { let span let context + let prioritySampler beforeEach(() => { context = { _trace: { tags: {} } } + prioritySampler = { + setPriority: sinon.stub() + } span = { + _prioritySampler: prioritySampler, addTags: sinon.stub(), context: sinon.stub().returns(context) } @@ -178,10 +190,10 @@ describe('vulnerability-reporter', () => { vulnerabilityAnalyzer._createVulnerability('INSECURE_HASHING', { value: 'sha1' }, 888)) sendVulnerabilities(iastContext.vulnerabilities, span) expect(span.addTags).to.have.been.calledOnceWithExactly({ - 'manual.keep': 'true', '_dd.iast.json': '{"sources":[],"vulnerabilities":[{"type":"INSECURE_HASHING","hash":3254801297,' + '"evidence":{"value":"sha1"},"location":{"spanId":888}}]}' }) + expect(prioritySampler.setPriority).to.have.been.calledOnceWithExactly(span, USER_KEEP, SAMPLING_MECHANISM_APPSEC) }) it('should send only valid vulnerabilities', () => { @@ -191,10 +203,10 @@ describe('vulnerability-reporter', () => { iastContext.vulnerabilities.push({ invalid: 'vulnerability' }) sendVulnerabilities(iastContext.vulnerabilities, span) expect(span.addTags).to.have.been.calledOnceWithExactly({ - 'manual.keep': 'true', '_dd.iast.json': '{"sources":[],"vulnerabilities":[{"type":"INSECURE_HASHING","hash":3254801297,' + '"evidence":{"value":"sha1"},"location":{"spanId":888}}]}' }) + expect(prioritySampler.setPriority).to.have.been.calledOnceWithExactly(span, USER_KEEP, SAMPLING_MECHANISM_APPSEC) }) it('should send vulnerabilities with evidence, ranges and sources', () => { @@ -239,7 +251,6 @@ describe('vulnerability-reporter', () => { sendVulnerabilities(iastContext.vulnerabilities, span) expect(span.addTags).to.have.been.calledOnceWithExactly({ - 'manual.keep': 'true', '_dd.iast.json': '{"sources":[{"origin":"ORIGIN_TYPE_1","name":"PARAMETER_NAME_1","value":"joe"},' + '{"origin":"ORIGIN_TYPE_2","name":"PARAMETER_NAME_2","value":"joe@mail.com"}],' + '"vulnerabilities":[{"type":"SQL_INJECTION","hash":4676753086,' + @@ -249,6 +260,7 @@ describe('vulnerability-reporter', () => { '[{"value":"SELECT id FROM u WHERE email = \'"},{"value":"joe@mail.com","source":1},{"value":"\';"}]},' + '"location":{"spanId":888,"path":"filename.js","line":99}}]}' }) + expect(prioritySampler.setPriority).to.have.been.calledOnceWithExactly(span, USER_KEEP, SAMPLING_MECHANISM_APPSEC) }) it('should send multiple vulnerabilities with same tainted source', () => { @@ -293,7 +305,6 @@ describe('vulnerability-reporter', () => { sendVulnerabilities(iastContext.vulnerabilities, span) expect(span.addTags).to.have.been.calledOnceWithExactly({ - 'manual.keep': 'true', '_dd.iast.json': '{"sources":[{"origin":"ORIGIN_TYPE_1","name":"PARAMETER_NAME_1","value":"joe"}],' + '"vulnerabilities":[{"type":"SQL_INJECTION","hash":4676753086,' + '"evidence":{"valueParts":[{"value":"SELECT * FROM u WHERE name = \'"},{"value":"joe","source":0},' + @@ -302,6 +313,7 @@ describe('vulnerability-reporter', () => { '[{"value":"UPDATE u SET name=\'"},{"value":"joe","source":0},{"value":"\' WHERE id=1;"}]},' + '"location":{"spanId":888,"path":"filename.js","line":99}}]}' }) + expect(prioritySampler.setPriority).to.have.been.calledOnceWithExactly(span, USER_KEEP, SAMPLING_MECHANISM_APPSEC) }) it('should send once with multiple vulnerabilities', () => { @@ -314,7 +326,6 @@ describe('vulnerability-reporter', () => { { path: '/path/to/file3.js', line: 3 })) sendVulnerabilities(iastContext.vulnerabilities, span) expect(span.addTags).to.have.been.calledOnceWithExactly({ - 'manual.keep': 'true', '_dd.iast.json': '{"sources":[],"vulnerabilities":[' + '{"type":"INSECURE_HASHING","hash":1697980169,"evidence":{"value":"sha1"},' + '"location":{"spanId":888,"path":"/path/to/file1.js","line":1}},' + @@ -323,6 +334,7 @@ describe('vulnerability-reporter', () => { '{"type":"INSECURE_HASHING","hash":1755238473,"evidence":{"value":"md5"},' + '"location":{"spanId":-5,"path":"/path/to/file3.js","line":3}}]}' }) + expect(prioritySampler.setPriority).to.have.been.calledOnceWithExactly(span, USER_KEEP, SAMPLING_MECHANISM_APPSEC) }) it('should send once vulnerability with one vulnerability', () => { @@ -332,10 +344,10 @@ describe('vulnerability-reporter', () => { { path: 'filename.js', line: 88 })) sendVulnerabilities(iastContext.vulnerabilities, span) expect(span.addTags).to.have.been.calledOnceWithExactly({ - 'manual.keep': 'true', '_dd.iast.json': '{"sources":[],"vulnerabilities":[{"type":"INSECURE_HASHING","hash":3410512691,' + '"evidence":{"value":"sha1"},"location":{"spanId":888,"path":"filename.js","line":88}}]}' }) + expect(prioritySampler.setPriority).to.have.been.calledOnceWithExactly(span, USER_KEEP, SAMPLING_MECHANISM_APPSEC) }) it('should not send duplicated vulnerabilities', () => { @@ -348,10 +360,10 @@ describe('vulnerability-reporter', () => { { path: 'filename.js', line: 88 })) sendVulnerabilities(iastContext.vulnerabilities, span) expect(span.addTags).to.have.been.calledOnceWithExactly({ - 'manual.keep': 'true', '_dd.iast.json': '{"sources":[],"vulnerabilities":[{"type":"INSECURE_HASHING","hash":3410512691,' + '"evidence":{"value":"sha1"},"location":{"spanId":888,"path":"filename.js","line":88}}]}' }) + expect(prioritySampler.setPriority).to.have.been.calledOnceWithExactly(span, USER_KEEP, SAMPLING_MECHANISM_APPSEC) }) it('should not send duplicated vulnerabilities in multiple sends', () => { @@ -365,10 +377,10 @@ describe('vulnerability-reporter', () => { sendVulnerabilities(iastContext.vulnerabilities, span) sendVulnerabilities(iastContext.vulnerabilities, span) expect(span.addTags).to.have.been.calledOnceWithExactly({ - 'manual.keep': 'true', '_dd.iast.json': '{"sources":[],"vulnerabilities":[{"type":"INSECURE_HASHING","hash":3410512691,' + '"evidence":{"value":"sha1"},"location":{"spanId":888,"path":"filename.js","line":88}}]}' }) + expect(prioritySampler.setPriority).to.have.been.calledOnceWithExactly(span, USER_KEEP, SAMPLING_MECHANISM_APPSEC) }) it('should not deduplicate vulnerabilities if not enabled', () => { @@ -384,12 +396,12 @@ describe('vulnerability-reporter', () => { { value: 'sha1' }, 888, { path: 'filename.js', line: 88 })) sendVulnerabilities(iastContext.vulnerabilities, span) expect(span.addTags).to.have.been.calledOnceWithExactly({ - 'manual.keep': 'true', '_dd.iast.json': '{"sources":[],"vulnerabilities":[{"type":"INSECURE_HASHING","hash":3410512691,' + '"evidence":{"value":"sha1"},"location":{"spanId":888,"path":"filename.js","line":88}},' + '{"type":"INSECURE_HASHING","hash":3410512691,"evidence":{"value":"sha1"},"location":' + '{"spanId":888,"path":"filename.js","line":88}}]}' }) + expect(prioritySampler.setPriority).to.have.been.calledOnceWithExactly(span, USER_KEEP, SAMPLING_MECHANISM_APPSEC) }) it('should add _dd.p.appsec trace tag with standalone enabled', () => { @@ -401,11 +413,12 @@ describe('vulnerability-reporter', () => { sendVulnerabilities(iastContext.vulnerabilities, span) expect(span.addTags).to.have.been.calledOnceWithExactly({ - 'manual.keep': 'true', '_dd.iast.json': '{"sources":[],"vulnerabilities":[{"type":"INSECURE_HASHING","hash":3254801297,' + '"evidence":{"value":"sha1"},"location":{"spanId":999}}]}' }) + expect(prioritySampler.setPriority).to.have.been.calledOnceWithExactly(span, USER_KEEP, SAMPLING_MECHANISM_APPSEC) + expect(span.context()._trace.tags).to.have.property(APPSEC_PROPAGATION_KEY) }) @@ -418,11 +431,12 @@ describe('vulnerability-reporter', () => { sendVulnerabilities(iastContext.vulnerabilities, span) expect(span.addTags).to.have.been.calledOnceWithExactly({ - 'manual.keep': 'true', '_dd.iast.json': '{"sources":[],"vulnerabilities":[{"type":"INSECURE_HASHING","hash":3254801297,' + '"evidence":{"value":"sha1"},"location":{"spanId":999}}]}' }) + expect(prioritySampler.setPriority).to.have.been.calledOnceWithExactly(span, USER_KEEP, SAMPLING_MECHANISM_APPSEC) + expect(span.context()._trace.tags).to.not.have.property(APPSEC_PROPAGATION_KEY) }) }) @@ -441,7 +455,8 @@ describe('vulnerability-reporter', () => { global.setInterval = sinon.spy(global.setInterval) global.clearInterval = sinon.spy(global.clearInterval) span = { - addTags: sinon.stub() + addTags: sinon.stub(), + keep: sinon.stub() } }) diff --git a/packages/dd-trace/test/appsec/rasp/command_injection.express.plugin.spec.js b/packages/dd-trace/test/appsec/rasp/command_injection.express.plugin.spec.js new file mode 100644 index 00000000000..3943bd0c3c3 --- /dev/null +++ b/packages/dd-trace/test/appsec/rasp/command_injection.express.plugin.spec.js @@ -0,0 +1,433 @@ +'use strict' + +const agent = require('../../plugins/agent') +const appsec = require('../../../src/appsec') +const Config = require('../../../src/config') +const path = require('path') +const Axios = require('axios') +const { getWebSpan, checkRaspExecutedAndHasThreat, checkRaspExecutedAndNotThreat } = require('./utils') +const { assert } = require('chai') + +describe('RASP - command_injection', () => { + withVersions('express', 'express', expressVersion => { + let app, server, axios + + async function testBlockingRequest () { + try { + await axios.get('/?dir=$(cat /etc/passwd 1>%262 ; echo .)') + } catch (e) { + if (!e.response) { + throw e + } + + return checkRaspExecutedAndHasThreat(agent, 'rasp-command_injection-rule-id-3') + } + + assert.fail('Request should be blocked') + } + + function checkRaspNotExecutedAndNotThreat (agent, checkRuleEval = true) { + return agent.use((traces) => { + const span = getWebSpan(traces) + + assert.notProperty(span.meta, '_dd.appsec.json') + assert.notProperty(span.meta_struct || {}, '_dd.stack') + if (checkRuleEval) { + assert.notProperty(span.metrics, '_dd.appsec.rasp.rule.eval') + } + }) + } + + function testBlockingAndSafeRequests () { + it('should block the threat', async () => { + await testBlockingRequest() + }) + + it('should not block safe request', async () => { + await axios.get('/?dir=.') + + return checkRaspExecutedAndNotThreat(agent) + }) + } + + function testSafeInNonShell () { + it('should not block the threat', async () => { + await axios.get('/?dir=$(cat /etc/passwd 1>%262 ; echo .)') + + return checkRaspNotExecutedAndNotThreat(agent) + }) + + it('should not block safe request', async () => { + await axios.get('/?dir=.') + + return checkRaspNotExecutedAndNotThreat(agent) + }) + } + + before(() => { + return agent.load(['express', 'http', 'child_process'], { client: false }) + }) + + before((done) => { + const express = require(`../../../../../versions/express@${expressVersion}`).get() + const expressApp = express() + + expressApp.get('/', (req, res) => { + app(req, res) + }) + + appsec.enable(new Config({ + appsec: { + enabled: true, + rules: path.join(__dirname, 'resources', 'rasp_rules.json'), + rasp: { enabled: true } + } + })) + + server = expressApp.listen(0, () => { + const port = server.address().port + axios = Axios.create({ + baseURL: `http://localhost:${port}` + }) + + done() + }) + }) + + after(() => { + appsec.disable() + server.close() + return agent.close({ ritmReset: false }) + }) + + describe('exec', () => { + describe('with callback', () => { + beforeEach(() => { + app = (req, res) => { + const childProcess = require('child_process') + + childProcess.exec(`ls ${req.query.dir}`, function (e) { + if (e?.name === 'DatadogRaspAbortError') { + res.writeHead(500) + } + + res.end('end') + }) + } + }) + + testBlockingAndSafeRequests() + }) + + describe('with promise', () => { + beforeEach(() => { + app = async (req, res) => { + const util = require('util') + const exec = util.promisify(require('child_process').exec) + + try { + await exec(`ls ${req.query.dir}`) + } catch (e) { + if (e.name === 'DatadogRaspAbortError') { + res.writeHead(500) + } + } + + res.end('end') + } + }) + + testBlockingAndSafeRequests() + }) + + describe('with event emitter', () => { + beforeEach(() => { + app = (req, res) => { + const childProcess = require('child_process') + + const child = childProcess.exec(`ls ${req.query.dir}`) + child.on('error', (e) => { + if (e.name === 'DatadogRaspAbortError') { + res.writeHead(500) + } + }) + + child.on('close', () => { + res.end() + }) + } + }) + + testBlockingAndSafeRequests() + }) + + describe('execSync', () => { + beforeEach(() => { + app = (req, res) => { + const childProcess = require('child_process') + + try { + childProcess.execSync(`ls ${req.query.dir}`) + } catch (e) { + if (e.name === 'DatadogRaspAbortError') { + res.writeHead(500) + } + } + + res.end('end') + } + }) + + testBlockingAndSafeRequests() + }) + }) + + describe('execFile', () => { + describe('with shell: true', () => { + describe('with callback', () => { + beforeEach(() => { + app = (req, res) => { + const childProcess = require('child_process') + + childProcess.execFile('ls', [req.query.dir], { shell: true }, function (e) { + if (e?.name === 'DatadogRaspAbortError') { + res.writeHead(500) + } + + res.end('end') + }) + } + }) + + testBlockingAndSafeRequests() + }) + + describe('with promise', () => { + beforeEach(() => { + app = async (req, res) => { + const util = require('util') + const execFile = util.promisify(require('child_process').execFile) + + try { + await execFile('ls', [req.query.dir], { shell: true }) + } catch (e) { + if (e.name === 'DatadogRaspAbortError') { + res.writeHead(500) + } + } + + res.end('end') + } + }) + + testBlockingAndSafeRequests() + }) + + describe('with event emitter', () => { + beforeEach(() => { + app = (req, res) => { + const childProcess = require('child_process') + + const child = childProcess.execFile('ls', [req.query.dir], { shell: true }) + child.on('error', (e) => { + if (e.name === 'DatadogRaspAbortError') { + res.writeHead(500) + } + }) + + child.on('close', () => { + res.end() + }) + } + }) + + testBlockingAndSafeRequests() + }) + + describe('execFileSync', () => { + beforeEach(() => { + app = (req, res) => { + const childProcess = require('child_process') + + try { + childProcess.execFileSync('ls', [req.query.dir], { shell: true }) + } catch (e) { + if (e.name === 'DatadogRaspAbortError') { + res.writeHead(500) + } + } + + res.end() + } + }) + + testBlockingAndSafeRequests() + }) + }) + + describe('without shell', () => { + describe('with callback', () => { + beforeEach(() => { + app = (req, res) => { + const childProcess = require('child_process') + + childProcess.execFile('ls', [req.query.dir], function (e) { + if (e?.name === 'DatadogRaspAbortError') { + res.writeHead(500) + } + + res.end('end') + }) + } + }) + + testSafeInNonShell() + }) + + describe('with promise', () => { + beforeEach(() => { + app = async (req, res) => { + const util = require('util') + const execFile = util.promisify(require('child_process').execFile) + + try { + await execFile('ls', [req.query.dir]) + } catch (e) { + if (e.name === 'DatadogRaspAbortError') { + res.writeHead(500) + } + } + + res.end('end') + } + }) + + testSafeInNonShell() + }) + + describe('with event emitter', () => { + beforeEach(() => { + app = (req, res) => { + const childProcess = require('child_process') + + const child = childProcess.execFile('ls', [req.query.dir]) + child.on('error', (e) => { + if (e.name === 'DatadogRaspAbortError') { + res.writeHead(500) + } + }) + + child.on('close', () => { + res.end() + }) + } + }) + + testSafeInNonShell() + }) + + describe('execFileSync', () => { + beforeEach(() => { + app = (req, res) => { + const childProcess = require('child_process') + + try { + childProcess.execFileSync('ls', [req.query.dir]) + } catch (e) { + if (e.name === 'DatadogRaspAbortError') { + res.writeHead(500) + } + } + + res.end() + } + }) + + testSafeInNonShell() + }) + }) + }) + + describe('spawn', () => { + describe('with shell: true', () => { + describe('with event emitter', () => { + beforeEach(() => { + app = (req, res) => { + const childProcess = require('child_process') + + const child = childProcess.spawn('ls', [req.query.dir], { shell: true }) + child.on('error', (e) => { + if (e.name === 'DatadogRaspAbortError') { + res.writeHead(500) + } + }) + + child.on('close', () => { + res.end() + }) + } + }) + + testBlockingAndSafeRequests() + }) + + describe('spawnSync', () => { + beforeEach(() => { + app = (req, res) => { + const childProcess = require('child_process') + + const child = childProcess.spawnSync('ls', [req.query.dir], { shell: true }) + if (child.error?.name === 'DatadogRaspAbortError') { + res.writeHead(500) + } + + res.end() + } + }) + + testBlockingAndSafeRequests() + }) + }) + + describe('without shell', () => { + describe('with event emitter', () => { + beforeEach(() => { + app = (req, res) => { + const childProcess = require('child_process') + + const child = childProcess.spawn('ls', [req.query.dir]) + child.on('error', (e) => { + if (e.name === 'DatadogRaspAbortError') { + res.writeHead(500) + } + }) + + child.on('close', () => { + res.end() + }) + } + }) + + testSafeInNonShell() + }) + + describe('spawnSync', () => { + beforeEach(() => { + app = (req, res) => { + const childProcess = require('child_process') + + const child = childProcess.spawnSync('ls', [req.query.dir]) + if (child.error?.name === 'DatadogRaspAbortError') { + res.writeHead(500) + } + + res.end() + } + }) + + testSafeInNonShell() + }) + }) + }) + }) +}) diff --git a/packages/dd-trace/test/appsec/rasp/command_injection.integration.spec.js b/packages/dd-trace/test/appsec/rasp/command_injection.integration.spec.js new file mode 100644 index 00000000000..c91c49b65df --- /dev/null +++ b/packages/dd-trace/test/appsec/rasp/command_injection.integration.spec.js @@ -0,0 +1,88 @@ +'use strict' + +const { createSandbox, FakeAgent, spawnProc } = require('../../../../../integration-tests/helpers') +const getPort = require('get-port') +const path = require('path') +const Axios = require('axios') +const { assert } = require('chai') + +describe('RASP - command_injection - integration', () => { + let axios, sandbox, cwd, appPort, appFile, agent, proc + + before(async function () { + this.timeout(60000) + + sandbox = await createSandbox( + ['express'], + false, + [path.join(__dirname, 'resources')] + ) + + appPort = await getPort() + cwd = sandbox.folder + appFile = path.join(cwd, 'resources', 'shi-app', 'index.js') + + axios = Axios.create({ + baseURL: `http://localhost:${appPort}` + }) + }) + + after(async function () { + this.timeout(60000) + await sandbox.remove() + }) + + beforeEach(async () => { + agent = await new FakeAgent().start() + proc = await spawnProc(appFile, { + cwd, + env: { + DD_TRACE_AGENT_PORT: agent.port, + DD_TRACE_DEBUG: 'true', + APP_PORT: appPort, + DD_APPSEC_ENABLED: 'true', + DD_APPSEC_RASP_ENABLED: 'true', + DD_APPSEC_RULES: path.join(cwd, 'resources', 'rasp_rules.json') + } + }) + }) + + afterEach(async () => { + proc.kill() + await agent.stop() + }) + + async function testRequestBlocked (url) { + try { + await axios.get(url) + } catch (e) { + if (!e.response) { + throw e + } + + assert.strictEqual(e.response.status, 403) + return await agent.assertMessageReceived(({ headers, payload }) => { + assert.property(payload[0][0].meta, '_dd.appsec.json') + assert.include(payload[0][0].meta['_dd.appsec.json'], '"rasp-command_injection-rule-id-3"') + }) + } + + throw new Error('Request should be blocked') + } + + it('should block using execFileSync and exception handled by express', async () => { + await testRequestBlocked('/shi/execFileSync?dir=$(cat /etc/passwd 1>%262 ; echo .)') + }) + + it('should block using execFileSync and unhandled exception', async () => { + await testRequestBlocked('/shi/execFileSync/out-of-express-scope?dir=$(cat /etc/passwd 1>%262 ; echo .)') + }) + + it('should block using execSync and exception handled by express', async () => { + await testRequestBlocked('/shi/execSync?dir=$(cat /etc/passwd 1>%262 ; echo .)') + }) + + it('should block using execSync and unhandled exception', async () => { + await testRequestBlocked('/shi/execSync/out-of-express-scope?dir=$(cat /etc/passwd 1>%262 ; echo .)') + }) +}) diff --git a/packages/dd-trace/test/appsec/rasp/command_injection.spec.js b/packages/dd-trace/test/appsec/rasp/command_injection.spec.js new file mode 100644 index 00000000000..785b155a113 --- /dev/null +++ b/packages/dd-trace/test/appsec/rasp/command_injection.spec.js @@ -0,0 +1,156 @@ +'use strict' + +const proxyquire = require('proxyquire') +const addresses = require('../../../src/appsec/addresses') +const { childProcessExecutionTracingChannel } = require('../../../src/appsec/channels') + +const { start } = childProcessExecutionTracingChannel + +describe('RASP - command_injection.js', () => { + let waf, datadogCore, commandInjection, utils, config + + beforeEach(() => { + datadogCore = { + storage: { + getStore: sinon.stub() + } + } + + waf = { + run: sinon.stub() + } + + utils = { + handleResult: sinon.stub() + } + + commandInjection = proxyquire('../../../src/appsec/rasp/command_injection', { + '../../../../datadog-core': datadogCore, + '../waf': waf, + './utils': utils + }) + + config = { + appsec: { + stackTrace: { + enabled: true, + maxStackTraces: 2, + maxDepth: 42 + } + } + } + + commandInjection.enable(config) + }) + + afterEach(() => { + sinon.restore() + commandInjection.disable() + }) + + describe('analyzeCommandInjection', () => { + it('should analyze command_injection without arguments', () => { + const ctx = { + file: 'cmd', + shell: true + } + const req = {} + datadogCore.storage.getStore.returns({ req }) + + start.publish(ctx) + + const persistent = { [addresses.SHELL_COMMAND]: 'cmd' } + sinon.assert.calledOnceWithExactly(waf.run, { persistent }, req, 'command_injection') + }) + + it('should analyze command_injection with arguments', () => { + const ctx = { + file: 'cmd', + fileArgs: ['arg0', 'arg1'], + shell: true + } + const req = {} + datadogCore.storage.getStore.returns({ req }) + + start.publish(ctx) + + const persistent = { [addresses.SHELL_COMMAND]: ['cmd', 'arg0', 'arg1'] } + sinon.assert.calledOnceWithExactly(waf.run, { persistent }, req, 'command_injection') + }) + + it('should not analyze command_injection when it is not shell', () => { + const ctx = { + file: 'cmd', + fileArgs: ['arg0', 'arg1'], + shell: false + } + const req = {} + datadogCore.storage.getStore.returns({ req }) + + start.publish(ctx) + + sinon.assert.notCalled(waf.run) + }) + + it('should not analyze command_injection if rasp is disabled', () => { + commandInjection.disable() + const ctx = { + file: 'cmd' + } + const req = {} + datadogCore.storage.getStore.returns({ req }) + + start.publish(ctx) + + sinon.assert.notCalled(waf.run) + }) + + it('should not analyze command_injection if no store', () => { + const ctx = { + file: 'cmd' + } + datadogCore.storage.getStore.returns(undefined) + + start.publish(ctx) + + sinon.assert.notCalled(waf.run) + }) + + it('should not analyze command_injection if no req', () => { + const ctx = { + file: 'cmd' + } + datadogCore.storage.getStore.returns({}) + + start.publish(ctx) + + sinon.assert.notCalled(waf.run) + }) + + it('should not analyze command_injection if no file', () => { + const ctx = { + fileArgs: ['arg0'] + } + const req = {} + datadogCore.storage.getStore.returns({ req }) + + start.publish(ctx) + + sinon.assert.notCalled(waf.run) + }) + + it('should call handleResult', () => { + const abortController = { abort: 'abort' } + const ctx = { file: 'cmd', abortController, shell: true } + const wafResult = { waf: 'waf' } + const req = { req: 'req' } + const res = { res: 'res' } + waf.run.returns(wafResult) + datadogCore.storage.getStore.returns({ req, res }) + + start.publish(ctx) + + sinon.assert.calledOnceWithExactly(utils.handleResult, wafResult, req, res, abortController, config) + }) + }) +}) diff --git a/packages/dd-trace/test/appsec/rasp/resources/rasp_rules.json b/packages/dd-trace/test/appsec/rasp/resources/rasp_rules.json index 778e4821e73..daca47d8d20 100644 --- a/packages/dd-trace/test/appsec/rasp/resources/rasp_rules.json +++ b/packages/dd-trace/test/appsec/rasp/resources/rasp_rules.json @@ -107,6 +107,55 @@ "block", "stack_trace" ] + }, + { + "id": "rasp-command_injection-rule-id-3", + "name": "Command injection exploit", + "tags": { + "type": "command_injection", + "category": "vulnerability_trigger", + "cwe": "77", + "capec": "1000/152/248/88", + "confidence": "0", + "module": "rasp" + }, + "conditions": [ + { + "parameters": { + "resource": [ + { + "address": "server.sys.shell.cmd" + } + ], + "params": [ + { + "address": "server.request.query" + }, + { + "address": "server.request.body" + }, + { + "address": "server.request.path_params" + }, + { + "address": "grpc.server.request.message" + }, + { + "address": "graphql.server.all_resolvers" + }, + { + "address": "graphql.server.resolver" + } + ] + }, + "operator": "shi_detector" + } + ], + "transformers": [], + "on_match": [ + "block", + "stack_trace" + ] } ] } diff --git a/packages/dd-trace/test/appsec/rasp/resources/shi-app/index.js b/packages/dd-trace/test/appsec/rasp/resources/shi-app/index.js new file mode 100644 index 00000000000..a6714bd2148 --- /dev/null +++ b/packages/dd-trace/test/appsec/rasp/resources/shi-app/index.js @@ -0,0 +1,44 @@ +'use strict' + +const tracer = require('dd-trace') +tracer.init({ + flushInterval: 1 +}) + +const express = require('express') +const childProcess = require('child_process') + +const app = express() +const port = process.env.APP_PORT || 3000 + +app.get('/shi/execFileSync', async (req, res) => { + childProcess.execFileSync('ls', [req.query.dir], { shell: true }) + + res.end('OK') +}) + +app.get('/shi/execFileSync/out-of-express-scope', async (req, res) => { + process.nextTick(() => { + childProcess.execFileSync('ls', [req.query.dir], { shell: true }) + + res.end('OK') + }) +}) + +app.get('/shi/execSync', async (req, res) => { + childProcess.execSync('ls', [req.query.dir]) + + res.end('OK') +}) + +app.get('/shi/execSync/out-of-express-scope', async (req, res) => { + process.nextTick(() => { + childProcess.execSync('ls', [req.query.dir]) + + res.end('OK') + }) +}) + +app.listen(port, () => { + process.send({ port }) +}) diff --git a/packages/dd-trace/test/appsec/remote_config/index.spec.js b/packages/dd-trace/test/appsec/remote_config/index.spec.js index dbd710d6a4e..b1804e0b646 100644 --- a/packages/dd-trace/test/appsec/remote_config/index.spec.js +++ b/packages/dd-trace/test/appsec/remote_config/index.spec.js @@ -298,6 +298,8 @@ describe('Remote Config index', () => { .to.have.been.calledWithExactly(RemoteConfigCapabilities.ASM_RASP_SQLI, true) expect(rc.updateCapabilities) .to.have.been.calledWithExactly(RemoteConfigCapabilities.ASM_RASP_LFI, true) + expect(rc.updateCapabilities) + .to.have.been.calledWithExactly(RemoteConfigCapabilities.ASM_RASP_SHI, true) expect(rc.setProductHandler).to.have.been.calledWith('ASM_DATA') expect(rc.setProductHandler).to.have.been.calledWith('ASM_DD') @@ -340,6 +342,8 @@ describe('Remote Config index', () => { .to.have.been.calledWithExactly(RemoteConfigCapabilities.ASM_RASP_SQLI, true) expect(rc.updateCapabilities) .to.have.been.calledWithExactly(RemoteConfigCapabilities.ASM_RASP_LFI, true) + expect(rc.updateCapabilities) + .to.have.been.calledWithExactly(RemoteConfigCapabilities.ASM_RASP_SHI, true) expect(rc.setProductHandler).to.have.been.calledWith('ASM_DATA') expect(rc.setProductHandler).to.have.been.calledWith('ASM_DD') @@ -384,6 +388,8 @@ describe('Remote Config index', () => { .to.have.been.calledWithExactly(RemoteConfigCapabilities.ASM_RASP_SQLI, true) expect(rc.updateCapabilities) .to.have.been.calledWithExactly(RemoteConfigCapabilities.ASM_RASP_LFI, true) + expect(rc.updateCapabilities) + .to.have.been.calledWithExactly(RemoteConfigCapabilities.ASM_RASP_SHI, true) }) it('should not activate rasp capabilities if rasp is disabled', () => { @@ -423,6 +429,8 @@ describe('Remote Config index', () => { .to.not.have.been.calledWith(RemoteConfigCapabilities.ASM_RASP_SQLI) expect(rc.updateCapabilities) .to.not.have.been.calledWithExactly(RemoteConfigCapabilities.ASM_RASP_LFI) + expect(rc.updateCapabilities) + .to.not.have.been.calledWithExactly(RemoteConfigCapabilities.ASM_RASP_SHI) }) }) @@ -462,6 +470,8 @@ describe('Remote Config index', () => { .to.have.been.calledWithExactly(RemoteConfigCapabilities.ASM_RASP_SQLI, false) expect(rc.updateCapabilities) .to.have.been.calledWithExactly(RemoteConfigCapabilities.ASM_RASP_LFI, false) + expect(rc.updateCapabilities) + .to.have.been.calledWithExactly(RemoteConfigCapabilities.ASM_RASP_SHI, false) expect(rc.removeProductHandler).to.have.been.calledWith('ASM_DATA') expect(rc.removeProductHandler).to.have.been.calledWith('ASM_DD') diff --git a/packages/dd-trace/test/appsec/reporter.spec.js b/packages/dd-trace/test/appsec/reporter.spec.js index 0860b2c75ac..757884c3566 100644 --- a/packages/dd-trace/test/appsec/reporter.spec.js +++ b/packages/dd-trace/test/appsec/reporter.spec.js @@ -3,6 +3,8 @@ const proxyquire = require('proxyquire') const { storage } = require('../../../datadog-core') const zlib = require('zlib') +const { SAMPLING_MECHANISM_APPSEC } = require('../../src/constants') +const { USER_KEEP } = require('../../../../ext/priority') describe('reporter', () => { let Reporter @@ -10,14 +12,21 @@ describe('reporter', () => { let web let telemetry let sample + let prioritySampler beforeEach(() => { + prioritySampler = { + setPriority: sinon.stub() + } + span = { + _prioritySampler: prioritySampler, context: sinon.stub().returns({ _tags: {} }), addTags: sinon.stub(), - setTag: sinon.stub() + setTag: sinon.stub(), + keep: sinon.stub() } web = { @@ -105,7 +114,6 @@ describe('reporter', () => { expect(Reporter.metricsQueue.get('_dd.appsec.event_rules.error_count')).to.be.eq(1) expect(Reporter.metricsQueue.get('_dd.appsec.event_rules.errors')) .to.be.eq(JSON.stringify(diagnosticsRules.errors)) - expect(Reporter.metricsQueue.get('manual.keep')).to.be.eq('true') }) it('should call incrementWafInitMetric', () => { @@ -222,11 +230,11 @@ describe('reporter', () => { expect(span.addTags).to.have.been.calledOnceWithExactly({ 'appsec.event': 'true', - 'manual.keep': 'true', '_dd.origin': 'appsec', '_dd.appsec.json': '{"triggers":[{"rule":{},"rule_matches":[{}]}]}', 'network.client.ip': '8.8.8.8' }) + expect(prioritySampler.setPriority).to.have.been.calledOnceWithExactly(span, USER_KEEP, SAMPLING_MECHANISM_APPSEC) }) it('should not add manual.keep when rate limit is reached', (done) => { @@ -234,24 +242,23 @@ describe('reporter', () => { const params = {} expect(Reporter.reportAttack('', params)).to.not.be.false - expect(addTags.getCall(0).firstArg).to.have.property('manual.keep').that.equals('true') expect(Reporter.reportAttack('', params)).to.not.be.false - expect(addTags.getCall(1).firstArg).to.have.property('manual.keep').that.equals('true') expect(Reporter.reportAttack('', params)).to.not.be.false - expect(addTags.getCall(2).firstArg).to.have.property('manual.keep').that.equals('true') + + expect(prioritySampler.setPriority).to.have.callCount(3) Reporter.setRateLimit(1) expect(Reporter.reportAttack('', params)).to.not.be.false expect(addTags.getCall(3).firstArg).to.have.property('appsec.event').that.equals('true') - expect(addTags.getCall(3).firstArg).to.have.property('manual.keep').that.equals('true') + expect(prioritySampler.setPriority).to.have.callCount(4) expect(Reporter.reportAttack('', params)).to.not.be.false expect(addTags.getCall(4).firstArg).to.have.property('appsec.event').that.equals('true') - expect(addTags.getCall(4).firstArg).to.not.have.property('manual.keep') + expect(prioritySampler.setPriority).to.have.callCount(4) setTimeout(() => { expect(Reporter.reportAttack('', params)).to.not.be.false - expect(addTags.getCall(5).firstArg).to.have.property('manual.keep').that.equals('true') + expect(prioritySampler.setPriority).to.have.callCount(5) done() }, 1020) }) @@ -265,10 +272,10 @@ describe('reporter', () => { expect(span.addTags).to.have.been.calledOnceWithExactly({ 'appsec.event': 'true', - 'manual.keep': 'true', '_dd.appsec.json': '{"triggers":[]}', 'network.client.ip': '8.8.8.8' }) + expect(prioritySampler.setPriority).to.have.been.calledOnceWithExactly(span, USER_KEEP, SAMPLING_MECHANISM_APPSEC) }) it('should merge attacks json', () => { @@ -280,11 +287,11 @@ describe('reporter', () => { expect(span.addTags).to.have.been.calledOnceWithExactly({ 'appsec.event': 'true', - 'manual.keep': 'true', '_dd.origin': 'appsec', '_dd.appsec.json': '{"triggers":[{"rule":{},"rule_matches":[{}]},{"rule":{}},{"rule":{},"rule_matches":[{}]}]}', 'network.client.ip': '8.8.8.8' }) + expect(prioritySampler.setPriority).to.have.been.calledOnceWithExactly(span, USER_KEEP, SAMPLING_MECHANISM_APPSEC) }) it('should call standalone sample', () => { @@ -296,12 +303,13 @@ describe('reporter', () => { expect(span.addTags).to.have.been.calledOnceWithExactly({ 'appsec.event': 'true', - 'manual.keep': 'true', '_dd.origin': 'appsec', '_dd.appsec.json': '{"triggers":[{"rule":{},"rule_matches":[{}]},{"rule":{}},{"rule":{},"rule_matches":[{}]}]}', 'network.client.ip': '8.8.8.8' }) + expect(prioritySampler.setPriority).to.have.been.calledOnceWithExactly(span, USER_KEEP, SAMPLING_MECHANISM_APPSEC) + expect(sample).to.have.been.calledOnceWithExactly(span) }) }) @@ -642,5 +650,16 @@ describe('reporter', () => { expect(span.setTag).to.have.been.calledWithExactly('_dd.appsec.rasp.duration_ext', 321) expect(span.setTag).to.have.been.calledWithExactly('_dd.appsec.rasp.rule.eval', 3) }) + + it('should keep span if there are metrics', () => { + const req = {} + + Reporter.metricsQueue.set('a', 1) + Reporter.metricsQueue.set('b', 2) + + Reporter.finishRequest(req, wafContext, {}) + + expect(prioritySampler.setPriority).to.have.been.calledOnceWithExactly(span, USER_KEEP, SAMPLING_MECHANISM_APPSEC) + }) }) }) diff --git a/packages/dd-trace/test/appsec/sdk/track_event.spec.js b/packages/dd-trace/test/appsec/sdk/track_event.spec.js index e3739488b81..fca01030c03 100644 --- a/packages/dd-trace/test/appsec/sdk/track_event.spec.js +++ b/packages/dd-trace/test/appsec/sdk/track_event.spec.js @@ -5,6 +5,8 @@ const agent = require('../../plugins/agent') const axios = require('axios') const tracer = require('../../../../../index') const { LOGIN_SUCCESS, LOGIN_FAILURE } = require('../../../src/appsec/addresses') +const { SAMPLING_MECHANISM_APPSEC } = require('../../../src/constants') +const { USER_KEEP } = require('../../../../../ext/priority') describe('track_event', () => { describe('Internal API', () => { @@ -16,14 +18,21 @@ describe('track_event', () => { let trackUserLoginSuccessEvent, trackUserLoginFailureEvent, trackCustomEvent, trackEvent let sample let waf + let prioritySampler beforeEach(() => { log = { warn: sinon.stub() } + prioritySampler = { + setPriority: sinon.stub() + } + rootSpan = { - addTags: sinon.stub() + _prioritySampler: prioritySampler, + addTags: sinon.stub(), + keep: sinon.stub() } getRootSpan = sinon.stub().callsFake(() => rootSpan) @@ -96,12 +105,13 @@ describe('track_event', () => { expect(rootSpan.addTags).to.have.been.calledOnceWithExactly( { 'appsec.events.users.login.success.track': 'true', - 'manual.keep': 'true', '_dd.appsec.events.users.login.success.sdk': 'true', 'appsec.events.users.login.success.metakey1': 'metaValue1', 'appsec.events.users.login.success.metakey2': 'metaValue2', 'appsec.events.users.login.success.metakey3': 'metaValue3' }) + expect(prioritySampler.setPriority) + .to.have.been.calledOnceWithExactly(rootSpan, USER_KEEP, SAMPLING_MECHANISM_APPSEC) }) it('should call setUser and addTags without metadata', () => { @@ -113,9 +123,10 @@ describe('track_event', () => { expect(setUserTags).to.have.been.calledOnceWithExactly(user, rootSpan) expect(rootSpan.addTags).to.have.been.calledOnceWithExactly({ 'appsec.events.users.login.success.track': 'true', - 'manual.keep': 'true', '_dd.appsec.events.users.login.success.sdk': 'true' }) + expect(prioritySampler.setPriority) + .to.have.been.calledOnceWithExactly(rootSpan, USER_KEEP, SAMPLING_MECHANISM_APPSEC) }) it('should call waf run with login success address', () => { @@ -161,7 +172,6 @@ describe('track_event', () => { expect(setUserTags).to.not.have.been.called expect(rootSpan.addTags).to.have.been.calledOnceWithExactly({ 'appsec.events.users.login.failure.track': 'true', - 'manual.keep': 'true', '_dd.appsec.events.users.login.failure.sdk': 'true', 'appsec.events.users.login.failure.usr.id': 'user_id', 'appsec.events.users.login.failure.usr.exists': 'true', @@ -169,6 +179,8 @@ describe('track_event', () => { 'appsec.events.users.login.failure.metakey2': 'metaValue2', 'appsec.events.users.login.failure.metakey3': 'metaValue3' }) + expect(prioritySampler.setPriority) + .to.have.been.calledOnceWithExactly(rootSpan, USER_KEEP, SAMPLING_MECHANISM_APPSEC) }) it('should send false `usr.exists` property when the user does not exist', () => { @@ -180,7 +192,6 @@ describe('track_event', () => { expect(setUserTags).to.not.have.been.called expect(rootSpan.addTags).to.have.been.calledOnceWithExactly({ 'appsec.events.users.login.failure.track': 'true', - 'manual.keep': 'true', '_dd.appsec.events.users.login.failure.sdk': 'true', 'appsec.events.users.login.failure.usr.id': 'user_id', 'appsec.events.users.login.failure.usr.exists': 'false', @@ -188,6 +199,8 @@ describe('track_event', () => { 'appsec.events.users.login.failure.metakey2': 'metaValue2', 'appsec.events.users.login.failure.metakey3': 'metaValue3' }) + expect(prioritySampler.setPriority) + .to.have.been.calledOnceWithExactly(rootSpan, USER_KEEP, SAMPLING_MECHANISM_APPSEC) }) it('should call addTags without metadata', () => { @@ -197,11 +210,12 @@ describe('track_event', () => { expect(setUserTags).to.not.have.been.called expect(rootSpan.addTags).to.have.been.calledOnceWithExactly({ 'appsec.events.users.login.failure.track': 'true', - 'manual.keep': 'true', '_dd.appsec.events.users.login.failure.sdk': 'true', 'appsec.events.users.login.failure.usr.id': 'user_id', 'appsec.events.users.login.failure.usr.exists': 'true' }) + expect(prioritySampler.setPriority) + .to.have.been.calledOnceWithExactly(rootSpan, USER_KEEP, SAMPLING_MECHANISM_APPSEC) }) it('should call waf run with login failure address', () => { @@ -241,11 +255,12 @@ describe('track_event', () => { expect(setUserTags).to.not.have.been.called expect(rootSpan.addTags).to.have.been.calledOnceWithExactly({ 'appsec.events.custom_event.track': 'true', - 'manual.keep': 'true', '_dd.appsec.events.custom_event.sdk': 'true', 'appsec.events.custom_event.metaKey1': 'metaValue1', 'appsec.events.custom_event.metakey2': 'metaValue2' }) + expect(prioritySampler.setPriority) + .to.have.been.calledOnceWithExactly(rootSpan, USER_KEEP, SAMPLING_MECHANISM_APPSEC) }) it('should call addTags without metadata', () => { @@ -255,9 +270,10 @@ describe('track_event', () => { expect(setUserTags).to.not.have.been.called expect(rootSpan.addTags).to.have.been.calledOnceWithExactly({ 'appsec.events.custom_event.track': 'true', - 'manual.keep': 'true', '_dd.appsec.events.custom_event.sdk': 'true' }) + expect(prioritySampler.setPriority) + .to.have.been.calledOnceWithExactly(rootSpan, USER_KEEP, SAMPLING_MECHANISM_APPSEC) }) }) @@ -266,31 +282,34 @@ describe('track_event', () => { trackEvent('event', { metaKey1: 'metaValue1', metakey2: 'metaValue2' }, 'trackEvent', rootSpan, 'safe') expect(rootSpan.addTags).to.have.been.calledOnceWithExactly({ 'appsec.events.event.track': 'true', - 'manual.keep': 'true', '_dd.appsec.events.event.auto.mode': 'safe', 'appsec.events.event.metaKey1': 'metaValue1', 'appsec.events.event.metakey2': 'metaValue2' }) + expect(prioritySampler.setPriority) + .to.have.been.calledOnceWithExactly(rootSpan, USER_KEEP, SAMPLING_MECHANISM_APPSEC) }) it('should call addTags with extended mode', () => { trackEvent('event', { metaKey1: 'metaValue1', metakey2: 'metaValue2' }, 'trackEvent', rootSpan, 'extended') expect(rootSpan.addTags).to.have.been.calledOnceWithExactly({ 'appsec.events.event.track': 'true', - 'manual.keep': 'true', '_dd.appsec.events.event.auto.mode': 'extended', 'appsec.events.event.metaKey1': 'metaValue1', 'appsec.events.event.metakey2': 'metaValue2' }) + expect(prioritySampler.setPriority) + .to.have.been.calledOnceWithExactly(rootSpan, USER_KEEP, SAMPLING_MECHANISM_APPSEC) }) it('should call standalone sample', () => { trackEvent('event', undefined, 'trackEvent', rootSpan, undefined) expect(rootSpan.addTags).to.have.been.calledOnceWithExactly({ - 'appsec.events.event.track': 'true', - 'manual.keep': 'true' + 'appsec.events.event.track': 'true' }) + expect(prioritySampler.setPriority) + .to.have.been.calledOnceWithExactly(rootSpan, USER_KEEP, SAMPLING_MECHANISM_APPSEC) expect(sample).to.have.been.calledOnceWithExactly(rootSpan) }) }) @@ -339,7 +358,7 @@ describe('track_event', () => { expect(traces[0][0].meta).to.have.property('appsec.events.users.login.success.track', 'true') expect(traces[0][0].meta).to.have.property('usr.id', 'test_user_id') expect(traces[0][0].meta).to.have.property('appsec.events.users.login.success.metakey', 'metaValue') - expect(traces[0][0].meta).to.have.property('manual.keep', 'true') + expect(traces[0][0].metrics).to.have.property('_sampling_priority_v1', USER_KEEP) }).then(done).catch(done) axios.get(`http://localhost:${port}/`) }) @@ -377,7 +396,7 @@ describe('track_event', () => { expect(traces[0][0].meta).to.have.property('appsec.events.users.login.failure.usr.id', 'test_user_id') expect(traces[0][0].meta).to.have.property('appsec.events.users.login.failure.usr.exists', 'true') expect(traces[0][0].meta).to.have.property('appsec.events.users.login.failure.metakey', 'metaValue') - expect(traces[0][0].meta).to.have.property('manual.keep', 'true') + expect(traces[0][0].metrics).to.have.property('_sampling_priority_v1', USER_KEEP) }).then(done).catch(done) axios.get(`http://localhost:${port}/`) }) @@ -392,7 +411,7 @@ describe('track_event', () => { expect(traces[0][0].meta).to.have.property('appsec.events.users.login.failure.usr.id', 'test_user_id') expect(traces[0][0].meta).to.have.property('appsec.events.users.login.failure.usr.exists', 'false') expect(traces[0][0].meta).to.have.property('appsec.events.users.login.failure.metakey', 'metaValue') - expect(traces[0][0].meta).to.have.property('manual.keep', 'true') + expect(traces[0][0].metrics).to.have.property('_sampling_priority_v1', USER_KEEP) }).then(done).catch(done) axios.get(`http://localhost:${port}/`) }) @@ -428,7 +447,7 @@ describe('track_event', () => { agent.use(traces => { expect(traces[0][0].meta).to.have.property('appsec.events.my-custom-event.track', 'true') expect(traces[0][0].meta).to.have.property('appsec.events.my-custom-event.metakey', 'metaValue') - expect(traces[0][0].meta).to.have.property('manual.keep', 'true') + expect(traces[0][0].metrics).to.have.property('_sampling_priority_v1', USER_KEEP) }).then(done).catch(done) axios.get(`http://localhost:${port}/`) }) @@ -440,7 +459,7 @@ describe('track_event', () => { res.end() } agent.use(traces => { - expect(traces[0][0].meta).to.not.have.property('manual.keep', 'true') + expect(traces[0][0].metrics).to.not.have.property('_sampling_priority_v1', USER_KEEP) }).then(done).catch(done) axios.get(`http://localhost:${port}/`) }) diff --git a/packages/dd-trace/test/appsec/waf/index.spec.js b/packages/dd-trace/test/appsec/waf/index.spec.js index aff0a7e37a0..33c0bfbb3a3 100644 --- a/packages/dd-trace/test/appsec/waf/index.spec.js +++ b/packages/dd-trace/test/appsec/waf/index.spec.js @@ -81,7 +81,6 @@ describe('WAF Manager', () => { expect(Reporter.metricsQueue.set).to.been.calledWithExactly('_dd.appsec.event_rules.loaded', 1) expect(Reporter.metricsQueue.set).to.been.calledWithExactly('_dd.appsec.event_rules.error_count', 0) expect(Reporter.metricsQueue.set).not.to.been.calledWith('_dd.appsec.event_rules.errors') - expect(Reporter.metricsQueue.set).to.been.calledWithExactly('manual.keep', 'true') }) it('should set init metrics with errors', () => { @@ -104,7 +103,6 @@ describe('WAF Manager', () => { expect(Reporter.metricsQueue.set).to.been.calledWithExactly('_dd.appsec.event_rules.error_count', 2) expect(Reporter.metricsQueue.set).to.been.calledWithExactly('_dd.appsec.event_rules.errors', '{"error_1":["invalid_1"],"error_2":["invalid_2","invalid_3"]}') - expect(Reporter.metricsQueue.set).to.been.calledWithExactly('manual.keep', 'true') }) }) diff --git a/packages/dd-trace/test/azure_metadata.spec.js b/packages/dd-trace/test/azure_metadata.spec.js new file mode 100644 index 00000000000..7a8cb787d75 --- /dev/null +++ b/packages/dd-trace/test/azure_metadata.spec.js @@ -0,0 +1,109 @@ +'use strict' + +require('./setup/tap') + +const os = require('os') +const { getAzureAppMetadata, getAzureTagsFromMetadata } = require('../src/azure_metadata') + +describe('Azure metadata', () => { + describe('for apps is', () => { + it('not provided without DD_AZURE_APP_SERVICES', () => { + delete process.env.DD_AZURE_APP_SERVICES + expect(getAzureAppMetadata()).to.be.undefined + }) + + it('provided with DD_AZURE_APP_SERVICES', () => { + delete process.env.COMPUTERNAME // actually defined on Windows + process.env.DD_AZURE_APP_SERVICES = '1' + delete process.env.WEBSITE_SITE_NAME + expect(getAzureAppMetadata()).to.deep.equal({ operatingSystem: os.platform(), siteKind: 'app', siteType: 'app' }) + }) + }) + + it('provided completely with minimum vars', () => { + delete process.env.WEBSITE_RESOURCE_GROUP + delete process.env.WEBSITE_OS + delete process.env.FUNCTIONS_EXTENSION_VERSION + delete process.env.FUNCTIONS_WORKER_RUNTIME + delete process.env.FUNCTIONS_WORKER_RUNTIME_VERSION + process.env.COMPUTERNAME = 'boaty_mcboatface' + process.env.DD_AZURE_APP_SERVICES = '1' + process.env.WEBSITE_SITE_NAME = 'website_name' + process.env.WEBSITE_OWNER_NAME = 'subscription_id+resource_group-regionwebspace' + process.env.WEBSITE_INSTANCE_ID = 'instance_id' + process.env.DD_AAS_DOTNET_EXTENSION_VERSION = '1.0' + const expected = { + extensionVersion: '1.0', + instanceID: 'instance_id', + instanceName: 'boaty_mcboatface', + operatingSystem: os.platform(), + resourceGroup: 'resource_group', + resourceID: + '/subscriptions/subscription_id/resourcegroups/resource_group/providers/microsoft.web/sites/website_name', + siteKind: 'app', + siteName: 'website_name', + siteType: 'app', + subscriptionID: 'subscription_id' + } + expect(getAzureAppMetadata()).to.deep.equal(expected) + }) + + it('provided completely with complete vars', () => { + process.env.COMPUTERNAME = 'boaty_mcboatface' + process.env.DD_AZURE_APP_SERVICES = '1' + process.env.WEBSITE_SITE_NAME = 'website_name' + process.env.WEBSITE_RESOURCE_GROUP = 'resource_group' + process.env.WEBSITE_OWNER_NAME = 'subscription_id+foo-regionwebspace' + process.env.WEBSITE_OS = 'windows' + process.env.WEBSITE_INSTANCE_ID = 'instance_id' + process.env.FUNCTIONS_EXTENSION_VERSION = '20' + process.env.FUNCTIONS_WORKER_RUNTIME = 'node' + process.env.FUNCTIONS_WORKER_RUNTIME_VERSION = '14' + process.env.DD_AAS_DOTNET_EXTENSION_VERSION = '1.0' + const expected = { + extensionVersion: '1.0', + functionRuntimeVersion: '20', + instanceID: 'instance_id', + instanceName: 'boaty_mcboatface', + operatingSystem: 'windows', + resourceGroup: 'resource_group', + resourceID: + '/subscriptions/subscription_id/resourcegroups/resource_group/providers/microsoft.web/sites/website_name', + runtime: 'node', + runtimeVersion: '14', + siteKind: 'functionapp', + siteName: 'website_name', + siteType: 'function', + subscriptionID: 'subscription_id' + } + expect(getAzureAppMetadata()).to.deep.equal(expected) + }) + + it('tags are correctly generated from vars', () => { + delete process.env.WEBSITE_RESOURCE_GROUP + delete process.env.WEBSITE_OS + delete process.env.FUNCTIONS_EXTENSION_VERSION + delete process.env.FUNCTIONS_WORKER_RUNTIME + delete process.env.FUNCTIONS_WORKER_RUNTIME_VERSION + process.env.COMPUTERNAME = 'boaty_mcboatface' + process.env.DD_AZURE_APP_SERVICES = '1' + process.env.WEBSITE_SITE_NAME = 'website_name' + process.env.WEBSITE_OWNER_NAME = 'subscription_id+resource_group-regionwebspace' + process.env.WEBSITE_INSTANCE_ID = 'instance_id' + process.env.DD_AAS_DOTNET_EXTENSION_VERSION = '1.0' + const expected = { + 'aas.environment.extension_version': '1.0', + 'aas.environment.instance_id': 'instance_id', + 'aas.environment.instance_name': 'boaty_mcboatface', + 'aas.environment.os': os.platform(), + 'aas.resource.group': 'resource_group', + 'aas.resource.id': + '/subscriptions/subscription_id/resourcegroups/resource_group/providers/microsoft.web/sites/website_name', + 'aas.site.kind': 'app', + 'aas.site.name': 'website_name', + 'aas.site.type': 'app', + 'aas.subscription.id': 'subscription_id' + } + expect(getAzureTagsFromMetadata(getAzureAppMetadata())).to.deep.equal(expected) + }) +}) diff --git a/packages/dd-trace/test/ci-visibility/dynamic-instrumentation/dynamic-instrumentation.spec.js b/packages/dd-trace/test/ci-visibility/dynamic-instrumentation/dynamic-instrumentation.spec.js new file mode 100644 index 00000000000..b07ce40533f --- /dev/null +++ b/packages/dd-trace/test/ci-visibility/dynamic-instrumentation/dynamic-instrumentation.spec.js @@ -0,0 +1,47 @@ +'use strict' + +require('../../../../dd-trace/test/setup/tap') + +const { fork } = require('child_process') +const path = require('path') + +const { assert } = require('chai') + +describe('test visibility with dynamic instrumentation', () => { + // Dynamic Instrumentation - Test Visibility not currently supported for windows + if (process.platform === 'win32') { + return + } + let childProcess + + afterEach(() => { + if (childProcess) { + childProcess.kill() + } + }) + + it('can grab local variables', (done) => { + childProcess = fork(path.join(__dirname, 'target-app', 'test-visibility-dynamic-instrumentation-script.js')) + + childProcess.on('message', ({ snapshot: { language, stack, probe, captures }, snapshotId }) => { + assert.exists(snapshotId) + assert.exists(probe) + assert.exists(stack) + assert.equal(language, 'javascript') + + assert.deepEqual(captures, { + lines: { + 9: { + locals: { + a: { type: 'number', value: '1' }, + b: { type: 'number', value: '2' }, + localVar: { type: 'number', value: '1' } + } + } + } + }) + + done() + }) + }) +}) diff --git a/packages/dd-trace/test/ci-visibility/dynamic-instrumentation/target-app/di-dependency.js b/packages/dd-trace/test/ci-visibility/dynamic-instrumentation/target-app/di-dependency.js new file mode 100644 index 00000000000..6d2144d2ed8 --- /dev/null +++ b/packages/dd-trace/test/ci-visibility/dynamic-instrumentation/target-app/di-dependency.js @@ -0,0 +1,10 @@ +'use strict' + +module.exports = function (a, b) { + // eslint-disable-next-line no-console + const localVar = 1 + if (a > 10) { + throw new Error('a is too big') + } + return a + b + localVar // location of the breakpoint +} diff --git a/packages/dd-trace/test/ci-visibility/dynamic-instrumentation/target-app/test-visibility-dynamic-instrumentation-script.js b/packages/dd-trace/test/ci-visibility/dynamic-instrumentation/target-app/test-visibility-dynamic-instrumentation-script.js new file mode 100644 index 00000000000..fedfaefdc6c --- /dev/null +++ b/packages/dd-trace/test/ci-visibility/dynamic-instrumentation/target-app/test-visibility-dynamic-instrumentation-script.js @@ -0,0 +1,29 @@ +'use strict' + +const path = require('path') +const tvDynamicInstrumentation = require('../../../../src/ci-visibility/dynamic-instrumentation') +const sum = require('./di-dependency') + +// keep process alive +const intervalId = setInterval(() => {}, 5000) + +tvDynamicInstrumentation.start() + +tvDynamicInstrumentation.isReady().then(() => { + const [ + snapshotId, + breakpointSetPromise, + breakpointHitPromise + ] = tvDynamicInstrumentation.addLineProbe({ file: path.join(__dirname, 'di-dependency.js'), line: 9 }) + + breakpointHitPromise.then(({ snapshot }) => { + // once the breakpoint is hit, we can grab the snapshot and send it to the parent process + process.send({ snapshot, snapshotId }) + clearInterval(intervalId) + }) + + // We run the code once the breakpoint is set + breakpointSetPromise.then(() => { + sum(1, 2) + }) +}) diff --git a/packages/dd-trace/test/ci-visibility/exporters/agent-proxy/agent-proxy.spec.js b/packages/dd-trace/test/ci-visibility/exporters/agent-proxy/agent-proxy.spec.js index 4ff8f12ace6..1abae9e82f1 100644 --- a/packages/dd-trace/test/ci-visibility/exporters/agent-proxy/agent-proxy.spec.js +++ b/packages/dd-trace/test/ci-visibility/exporters/agent-proxy/agent-proxy.spec.js @@ -6,6 +6,7 @@ const nock = require('nock') const AgentProxyCiVisibilityExporter = require('../../../../src/ci-visibility/exporters/agent-proxy') const AgentlessWriter = require('../../../../src/ci-visibility/exporters/agentless/writer') +const DynamicInstrumentationLogsWriter = require('../../../../src/ci-visibility/exporters/agentless/di-logs-writer') const CoverageWriter = require('../../../../src/ci-visibility/exporters/agentless/coverage-writer') const AgentWriter = require('../../../../src/exporters/agent/writer') @@ -68,7 +69,10 @@ describe('AgentProxyCiVisibilityExporter', () => { .get('/info') .delay(queryDelay) .reply(200, JSON.stringify({ - endpoints: ['/evp_proxy/v2/'] + endpoints: [ + '/evp_proxy/v2/', + '/debugger/v1/input' + ] })) }) @@ -112,6 +116,35 @@ describe('AgentProxyCiVisibilityExporter', () => { agentProxyCiVisibilityExporter.exportCoverage(coverage) expect(mockWriter.append).to.have.been.calledWith({ spanId: '1', traceId: '1', files: [] }) }) + + context('if isTestDynamicInstrumentationEnabled is set', () => { + it('should initialise DynamicInstrumentationLogsWriter', async () => { + const agentProxyCiVisibilityExporter = new AgentProxyCiVisibilityExporter({ + port, + tags, + isTestDynamicInstrumentationEnabled: true + }) + await agentProxyCiVisibilityExporter._canUseCiVisProtocolPromise + expect(agentProxyCiVisibilityExporter._logsWriter).to.be.instanceOf(DynamicInstrumentationLogsWriter) + }) + + it('should process logs', async () => { + const mockWriter = { + append: sinon.spy(), + flush: sinon.spy() + } + const agentProxyCiVisibilityExporter = new AgentProxyCiVisibilityExporter({ + port, + tags, + isTestDynamicInstrumentationEnabled: true + }) + await agentProxyCiVisibilityExporter._canUseCiVisProtocolPromise + agentProxyCiVisibilityExporter._logsWriter = mockWriter + const log = { message: 'hello' } + agentProxyCiVisibilityExporter.exportDiLogs({}, log) + expect(mockWriter.append).to.have.been.calledWith(sinon.match(log)) + }) + }) }) describe('agent is not evp compatible', () => { @@ -166,6 +199,35 @@ describe('AgentProxyCiVisibilityExporter', () => { }) expect(mockWriter.append).not.to.have.been.called }) + + context('if isTestDynamicInstrumentationEnabled is set', () => { + it('should not initialise DynamicInstrumentationLogsWriter', async () => { + const agentProxyCiVisibilityExporter = new AgentProxyCiVisibilityExporter({ + port, + tags, + isTestDynamicInstrumentationEnabled: true + }) + await agentProxyCiVisibilityExporter._canUseCiVisProtocolPromise + expect(agentProxyCiVisibilityExporter._logsWriter).to.be.undefined + }) + + it('should not process logs', async () => { + const mockWriter = { + append: sinon.spy(), + flush: sinon.spy() + } + const agentProxyCiVisibilityExporter = new AgentProxyCiVisibilityExporter({ + port, + tags, + isTestDynamicInstrumentationEnabled: true + }) + await agentProxyCiVisibilityExporter._canUseCiVisProtocolPromise + agentProxyCiVisibilityExporter._logsWriter = mockWriter + const log = { message: 'hello' } + agentProxyCiVisibilityExporter.exportDiLogs({}, log) + expect(mockWriter.append).not.to.have.been.called + }) + }) }) describe('export', () => { diff --git a/packages/dd-trace/test/ci-visibility/exporters/agentless/di-logs-writer.spec.js b/packages/dd-trace/test/ci-visibility/exporters/agentless/di-logs-writer.spec.js new file mode 100644 index 00000000000..85a674a0d85 --- /dev/null +++ b/packages/dd-trace/test/ci-visibility/exporters/agentless/di-logs-writer.spec.js @@ -0,0 +1,105 @@ +'use strict' + +require('../../../../../dd-trace/test/setup/tap') + +const { expect } = require('chai') +const sinon = require('sinon') +const nock = require('nock') +const DynamicInstrumentationLogsWriter = require('../../../../src/ci-visibility/exporters/agentless/di-logs-writer') +const log = require('../../../../src/log') + +describe('Test Visibility DI Writer', () => { + beforeEach(() => { + nock.cleanAll() + process.env.DD_API_KEY = '1' + }) + + afterEach(() => { + delete process.env.DD_API_KEY + sinon.restore() + }) + + context('agentless', () => { + it('can send logs to the logs intake', (done) => { + const scope = nock('http://www.example.com') + .post('/api/v2/logs', body => { + expect(body).to.deep.equal([{ message: 'test' }, { message: 'test2' }]) + return true + }) + .reply(202) + + const logsWriter = new DynamicInstrumentationLogsWriter({ url: 'http://www.example.com' }) + + logsWriter.append({ message: 'test' }) + logsWriter.append({ message: 'test2' }) + + logsWriter.flush(() => { + scope.done() + done() + }) + }) + + it('logs an error if the request fails', (done) => { + const logErrorSpy = sinon.spy(log, 'error') + + const scope = nock('http://www.example.com') + .post('/api/v2/logs') + .reply(500) + + const logsWriter = new DynamicInstrumentationLogsWriter({ url: 'http://www.example.com' }) + + logsWriter.append({ message: 'test5' }) + logsWriter.append({ message: 'test6' }) + + logsWriter.flush(() => { + expect(logErrorSpy.called).to.be.true + scope.done() + done() + }) + }) + }) + + context('agent based', () => { + it('can send logs to the debugger endpoint in the agent', (done) => { + delete process.env.DD_API_KEY + + const scope = nock('http://www.example.com') + .post('/debugger/v1/input', body => { + expect(body).to.deep.equal([{ message: 'test3' }, { message: 'test4' }]) + return true + }) + .reply(202) + + const logsWriter = new DynamicInstrumentationLogsWriter({ url: 'http://www.example.com', isAgentProxy: true }) + + logsWriter.append({ message: 'test3' }) + logsWriter.append({ message: 'test4' }) + + logsWriter.flush(() => { + scope.done() + done() + }) + }) + + it('logs an error if the request fails', (done) => { + delete process.env.DD_API_KEY + + const logErrorSpy = sinon.spy(log, 'error') + + const scope = nock('http://www.example.com') + .post('/debugger/v1/input') + .reply(500) + + const logsWriter = new DynamicInstrumentationLogsWriter({ url: 'http://www.example.com', isAgentProxy: true }) + + logsWriter.append({ message: 'test5' }) + logsWriter.append({ message: 'test6' }) + + logsWriter.flush(() => { + expect(logErrorSpy.called).to.be.true + scope.done() + done() + }) + }) + }) +}) diff --git a/packages/dd-trace/test/ci-visibility/exporters/agentless/exporter.spec.js b/packages/dd-trace/test/ci-visibility/exporters/agentless/exporter.spec.js index 11b3bf1ec4c..dd229984bd2 100644 --- a/packages/dd-trace/test/ci-visibility/exporters/agentless/exporter.spec.js +++ b/packages/dd-trace/test/ci-visibility/exporters/agentless/exporter.spec.js @@ -8,6 +8,7 @@ const { expect } = require('chai') const nock = require('nock') const AgentlessCiVisibilityExporter = require('../../../../src/ci-visibility/exporters/agentless') +const DynamicInstrumentationLogsWriter = require('../../../../src/ci-visibility/exporters/agentless/di-logs-writer') describe('CI Visibility Agentless Exporter', () => { const url = new URL('http://www.example.com') @@ -177,6 +178,33 @@ describe('CI Visibility Agentless Exporter', () => { }) }) + context('if isTestDynamicInstrumentationEnabled is set', () => { + it('should initialise DynamicInstrumentationLogsWriter', async () => { + const agentProxyCiVisibilityExporter = new AgentlessCiVisibilityExporter({ + tags: {}, + isTestDynamicInstrumentationEnabled: true + }) + await agentProxyCiVisibilityExporter._canUseCiVisProtocolPromise + expect(agentProxyCiVisibilityExporter._logsWriter).to.be.instanceOf(DynamicInstrumentationLogsWriter) + }) + + it('should process logs', async () => { + const mockWriter = { + append: sinon.spy(), + flush: sinon.spy() + } + const agentProxyCiVisibilityExporter = new AgentlessCiVisibilityExporter({ + tags: {}, + isTestDynamicInstrumentationEnabled: true + }) + await agentProxyCiVisibilityExporter._canUseCiVisProtocolPromise + agentProxyCiVisibilityExporter._logsWriter = mockWriter + const log = { message: 'hello' } + agentProxyCiVisibilityExporter.exportDiLogs({}, log) + expect(mockWriter.append).to.have.been.calledWith(sinon.match(log)) + }) + }) + describe('url', () => { it('sets the default if URL param is not specified', () => { const site = 'd4tad0g.com' diff --git a/packages/dd-trace/test/ci-visibility/exporters/ci-visibility-exporter.spec.js b/packages/dd-trace/test/ci-visibility/exporters/ci-visibility-exporter.spec.js index b92d5b3ae98..7b09f8fba2d 100644 --- a/packages/dd-trace/test/ci-visibility/exporters/ci-visibility-exporter.spec.js +++ b/packages/dd-trace/test/ci-visibility/exporters/ci-visibility-exporter.spec.js @@ -815,4 +815,97 @@ describe('CI Visibility Exporter', () => { }) }) }) + + describe('exportDiLogs', () => { + context('is not initialized', () => { + it('should do nothing', () => { + const log = { message: 'log' } + const ciVisibilityExporter = new CiVisibilityExporter({ port, isTestDynamicInstrumentationEnabled: true }) + ciVisibilityExporter.exportDiLogs(log) + ciVisibilityExporter._export = sinon.spy() + expect(ciVisibilityExporter._export).not.to.be.called + }) + }) + + context('is initialized but can not forward logs', () => { + it('should do nothing', () => { + const writer = { + append: sinon.spy(), + flush: sinon.spy(), + setUrl: sinon.spy() + } + const log = { message: 'log' } + const ciVisibilityExporter = new CiVisibilityExporter({ port, isTestDynamicInstrumentationEnabled: true }) + ciVisibilityExporter._isInitialized = true + ciVisibilityExporter._logsWriter = writer + ciVisibilityExporter._canForwardLogs = false + ciVisibilityExporter.exportDiLogs(log) + expect(ciVisibilityExporter._logsWriter.append).not.to.be.called + }) + }) + + context('is initialized and can forward logs', () => { + it('should export formatted logs', () => { + const writer = { + append: sinon.spy(), + flush: sinon.spy(), + setUrl: sinon.spy() + } + const diLog = { + message: 'log', + debugger: { + snapshot: { + id: '1234', + timestamp: 1234567890, + probe: { + id: '54321', + version: '1', + location: { + file: 'example.js', + lines: ['1'] + } + }, + stack: [ + { + fileName: 'example.js', + function: 'sum', + lineNumber: 1 + } + ], + language: 'javascript' + } + } + } + const ciVisibilityExporter = new CiVisibilityExporter({ + env: 'ci', + version: '1.0.0', + port, + isTestDynamicInstrumentationEnabled: true, + service: 'my-service' + }) + ciVisibilityExporter._isInitialized = true + ciVisibilityExporter._logsWriter = writer + ciVisibilityExporter._canForwardLogs = true + ciVisibilityExporter.exportDiLogs( + { + 'git.repository_url': 'https://github.com/datadog/dd-trace-js.git', + 'git.commit.sha': '1234' + }, + diLog + ) + expect(ciVisibilityExporter._logsWriter.append).to.be.calledWith(sinon.match({ + ddtags: 'git.repository_url:https://github.com/datadog/dd-trace-js.git,git.commit.sha:1234', + level: 'error', + ddsource: 'dd_debugger', + service: 'my-service', + dd: { + service: 'my-service', + env: 'ci', + version: '1.0.0' + }, + ...diLog + })) + }) + }) + }) }) diff --git a/packages/dd-trace/test/config.spec.js b/packages/dd-trace/test/config.spec.js index 4246167725d..fa2734b206e 100644 --- a/packages/dd-trace/test/config.spec.js +++ b/packages/dd-trace/test/config.spec.js @@ -232,8 +232,8 @@ describe('Config', () => { expect(config).to.have.property('spanRemoveIntegrationFromService', false) expect(config).to.have.property('instrumentation_config_id', undefined) expect(config).to.have.deep.property('serviceMapping', {}) - expect(config).to.have.nested.deep.property('tracePropagationStyle.inject', ['datadog', 'tracecontext']) - expect(config).to.have.nested.deep.property('tracePropagationStyle.extract', ['datadog', 'tracecontext']) + expect(config).to.have.nested.deep.property('tracePropagationStyle.inject', ['datadog', 'tracecontext', 'baggage']) + expect(config).to.have.nested.deep.property('tracePropagationStyle.extract', ['datadog', 'tracecontext', 'baggage']) expect(config).to.have.nested.property('experimental.runtimeId', false) expect(config).to.have.nested.property('experimental.exporter', undefined) expect(config).to.have.nested.property('experimental.enableGetRumData', false) @@ -266,6 +266,9 @@ describe('Config', () => { expect(config).to.have.nested.property('installSignature.id', null) expect(config).to.have.nested.property('installSignature.time', null) expect(config).to.have.nested.property('installSignature.type', null) + expect(config).to.have.nested.property('llmobs.mlApp', undefined) + expect(config).to.have.nested.property('llmobs.agentlessEnabled', false) + expect(config).to.have.nested.property('llmobs.enabled', false) expect(updateConfig).to.be.calledOnce @@ -330,7 +333,11 @@ describe('Config', () => { { name: 'isGitUploadEnabled', value: false, origin: 'default' }, { name: 'isIntelligentTestRunnerEnabled', value: false, origin: 'default' }, { name: 'isManualApiEnabled', value: false, origin: 'default' }, + { name: 'llmobs.agentlessEnabled', value: false, origin: 'default' }, + { name: 'llmobs.mlApp', value: undefined, origin: 'default' }, { name: 'ciVisibilityTestSessionName', value: '', origin: 'default' }, + { name: 'ciVisAgentlessLogSubmissionEnabled', value: false, origin: 'default' }, + { name: 'isTestDynamicInstrumentationEnabled', value: false, origin: 'default' }, { name: 'logInjection', value: false, origin: 'default' }, { name: 'lookup', value: undefined, origin: 'default' }, { name: 'openAiLogsEnabled', value: false, origin: 'default' }, @@ -500,6 +507,8 @@ describe('Config', () => { process.env.DD_INSTRUMENTATION_INSTALL_TYPE = 'k8s_single_step' process.env.DD_INSTRUMENTATION_INSTALL_TIME = '1703188212' process.env.DD_INSTRUMENTATION_CONFIG_ID = 'abcdef123' + process.env.DD_LLMOBS_AGENTLESS_ENABLED = 'true' + process.env.DD_LLMOBS_ML_APP = 'myMlApp' process.env.DD_TRACE_ENABLED = 'true' process.env.DD_GRPC_CLIENT_ERROR_STATUSES = '3,13,400-403' process.env.DD_GRPC_SERVER_ERROR_STATUSES = '3,13,400-403' @@ -602,6 +611,8 @@ describe('Config', () => { type: 'k8s_single_step', time: '1703188212' }) + expect(config).to.have.nested.property('llmobs.mlApp', 'myMlApp') + expect(config).to.have.nested.property('llmobs.agentlessEnabled', true) expect(updateConfig).to.be.calledOnce @@ -667,7 +678,9 @@ describe('Config', () => { { name: 'traceId128BitGenerationEnabled', value: true, origin: 'env_var' }, { name: 'traceId128BitLoggingEnabled', value: true, origin: 'env_var' }, { name: 'tracing', value: false, origin: 'env_var' }, - { name: 'version', value: '1.0.0', origin: 'env_var' } + { name: 'version', value: '1.0.0', origin: 'env_var' }, + { name: 'llmobs.mlApp', value: 'myMlApp', origin: 'env_var' }, + { name: 'llmobs.agentlessEnabled', value: true, origin: 'env_var' } ]) }) @@ -817,7 +830,12 @@ describe('Config', () => { pollInterval: 42 }, traceId128BitGenerationEnabled: true, - traceId128BitLoggingEnabled: true + traceId128BitLoggingEnabled: true, + llmobs: { + mlApp: 'myMlApp', + agentlessEnabled: true, + apiKey: 'myApiKey' + } }) expect(config).to.have.property('protocolVersion', '0.5') @@ -892,6 +910,8 @@ describe('Config', () => { a: 'aa', b: 'bb' }) + expect(config).to.have.nested.property('llmobs.mlApp', 'myMlApp') + expect(config).to.have.nested.property('llmobs.agentlessEnabled', true) expect(updateConfig).to.be.calledOnce @@ -939,7 +959,9 @@ describe('Config', () => { { name: 'stats.enabled', value: false, origin: 'calculated' }, { name: 'traceId128BitGenerationEnabled', value: true, origin: 'code' }, { name: 'traceId128BitLoggingEnabled', value: true, origin: 'code' }, - { name: 'version', value: '0.1.0', origin: 'code' } + { name: 'version', value: '0.1.0', origin: 'code' }, + { name: 'llmobs.mlApp', value: 'myMlApp', origin: 'code' }, + { name: 'llmobs.agentlessEnabled', value: true, origin: 'code' } ]) }) @@ -1140,6 +1162,8 @@ describe('Config', () => { process.env.DD_IAST_REDACTION_VALUE_PATTERN = 'value_pattern_to_be_overriden_by_options' process.env.DD_TRACE_128_BIT_TRACEID_GENERATION_ENABLED = 'true' process.env.DD_TRACE_128_BIT_TRACEID_LOGGING_ENABLED = 'true' + process.env.DD_LLMOBS_ML_APP = 'myMlApp' + process.env.DD_LLMOBS_AGENTLESS_ENABLED = 'true' const config = new Config({ protocolVersion: '0.5', @@ -1221,7 +1245,11 @@ describe('Config', () => { enabled: false }, traceId128BitGenerationEnabled: false, - traceId128BitLoggingEnabled: false + traceId128BitLoggingEnabled: false, + llmobs: { + mlApp: 'myOtherMlApp', + agentlessEnabled: false + } }) expect(config).to.have.property('protocolVersion', '0.5') @@ -1282,6 +1310,8 @@ describe('Config', () => { expect(config).to.have.nested.property('iast.redactionEnabled', true) expect(config).to.have.nested.property('iast.redactionNamePattern', 'REDACTION_NAME_PATTERN') expect(config).to.have.nested.property('iast.redactionValuePattern', 'REDACTION_VALUE_PATTERN') + expect(config).to.have.nested.property('llmobs.mlApp', 'myOtherMlApp') + expect(config).to.have.nested.property('llmobs.agentlessEnabled', false) }) it('should give priority to non-experimental options', () => { @@ -1875,6 +1905,8 @@ describe('Config', () => { delete process.env.DD_CIVISIBILITY_FLAKY_RETRY_COUNT delete process.env.DD_TEST_SESSION_NAME delete process.env.JEST_WORKER_ID + delete process.env.DD_TEST_DYNAMIC_INSTRUMENTATION_ENABLED + delete process.env.DD_AGENTLESS_LOG_SUBMISSION_ENABLED options = {} }) context('ci visibility mode is enabled', () => { @@ -1963,6 +1995,24 @@ describe('Config', () => { const config = new Config(options) expect(config).to.have.property('ciVisibilityTestSessionName', 'my-test-session') }) + it('should not enable agentless log submission by default', () => { + const config = new Config(options) + expect(config).to.have.property('ciVisAgentlessLogSubmissionEnabled', false) + }) + it('should enable agentless log submission if DD_AGENTLESS_LOG_SUBMISSION_ENABLED is true', () => { + process.env.DD_AGENTLESS_LOG_SUBMISSION_ENABLED = 'true' + const config = new Config(options) + expect(config).to.have.property('ciVisAgentlessLogSubmissionEnabled', true) + }) + it('should not set isTestDynamicInstrumentationEnabled by default', () => { + const config = new Config(options) + expect(config).to.have.property('isTestDynamicInstrumentationEnabled', false) + }) + it('should set isTestDynamicInstrumentationEnabled if DD_TEST_DYNAMIC_INSTRUMENTATION_ENABLED is passed', () => { + process.env.DD_TEST_DYNAMIC_INSTRUMENTATION_ENABLED = 'true' + const config = new Config(options) + expect(config).to.have.property('isTestDynamicInstrumentationEnabled', true) + }) }) context('ci visibility mode is not enabled', () => { it('should not activate intelligent test runner or git metadata upload', () => { @@ -2054,6 +2104,61 @@ describe('Config', () => { }) }) + context('llmobs config', () => { + it('should disable llmobs by default', () => { + const config = new Config() + expect(config.llmobs.enabled).to.be.false + + // check origin computation + expect(updateConfig.getCall(0).args[0]).to.deep.include({ + name: 'llmobs.enabled', value: false, origin: 'default' + }) + }) + + it('should enable llmobs if DD_LLMOBS_ENABLED is set to true', () => { + process.env.DD_LLMOBS_ENABLED = 'true' + const config = new Config() + expect(config.llmobs.enabled).to.be.true + + // check origin computation + expect(updateConfig.getCall(0).args[0]).to.deep.include({ + name: 'llmobs.enabled', value: true, origin: 'env_var' + }) + }) + + it('should disable llmobs if DD_LLMOBS_ENABLED is set to false', () => { + process.env.DD_LLMOBS_ENABLED = 'false' + const config = new Config() + expect(config.llmobs.enabled).to.be.false + + // check origin computation + expect(updateConfig.getCall(0).args[0]).to.deep.include({ + name: 'llmobs.enabled', value: false, origin: 'env_var' + }) + }) + + it('should enable llmobs with options and DD_LLMOBS_ENABLED is not set', () => { + const config = new Config({ llmobs: {} }) + expect(config.llmobs.enabled).to.be.true + + // check origin computation + expect(updateConfig.getCall(0).args[0]).to.deep.include({ + name: 'llmobs.enabled', value: true, origin: 'code' + }) + }) + + it('should have DD_LLMOBS_ENABLED take priority over options', () => { + process.env.DD_LLMOBS_ENABLED = 'false' + const config = new Config({ llmobs: {} }) + expect(config.llmobs.enabled).to.be.false + + // check origin computation + expect(updateConfig.getCall(0).args[0]).to.deep.include({ + name: 'llmobs.enabled', value: false, origin: 'env_var' + }) + }) + }) + it('should sanitize values for API Security sampling between 0 and 1', () => { expect(new Config({ appsec: { diff --git a/packages/dd-trace/test/datastreams/data_streams_checkpointer.spec.js b/packages/dd-trace/test/datastreams/data_streams_checkpointer.spec.js index ba33d4c8bdf..db29f96b575 100644 --- a/packages/dd-trace/test/datastreams/data_streams_checkpointer.spec.js +++ b/packages/dd-trace/test/datastreams/data_streams_checkpointer.spec.js @@ -2,8 +2,8 @@ require('../setup/tap') const agent = require('../plugins/agent') -const expectedProducerHash = '13182885521735152072' -const expectedConsumerHash = '5980058680018671020' +const expectedProducerHash = '11369286567396183453' +const expectedConsumerHash = '11204511019589278729' const DSM_CONTEXT_HEADER = 'dd-pathway-ctx-base64' describe('data streams checkpointer manual api', () => { diff --git a/packages/dd-trace/test/debugger/devtools_client/snapshot/complex-types.spec.js b/packages/dd-trace/test/debugger/devtools_client/snapshot/complex-types.spec.js index 22036e4c60a..0e46a2faba0 100644 --- a/packages/dd-trace/test/debugger/devtools_client/snapshot/complex-types.spec.js +++ b/packages/dd-trace/test/debugger/devtools_client/snapshot/complex-types.spec.js @@ -23,7 +23,7 @@ describe('debugger -> devtools client -> snapshot.getLocalStateForCallFrame', fu session.once('Debugger.paused', async ({ params }) => { expect(params.hitBreakpoints.length).to.eq(1) - resolve((await getLocalStateForCallFrame(params.callFrames[0]))()) + resolve((await getLocalStateForCallFrame(params.callFrames[0], { maxFieldCount: Number.MAX_SAFE_INTEGER }))()) }) await setAndTriggerBreakpoint(target, 10) diff --git a/packages/dd-trace/test/debugger/devtools_client/snapshot/max-field-count-scopes.spec.js b/packages/dd-trace/test/debugger/devtools_client/snapshot/max-field-count-scopes.spec.js new file mode 100644 index 00000000000..1f3fb8c14c6 --- /dev/null +++ b/packages/dd-trace/test/debugger/devtools_client/snapshot/max-field-count-scopes.spec.js @@ -0,0 +1,32 @@ +'use strict' + +require('../../../setup/mocha') + +const { getTargetCodePath, enable, teardown, assertOnBreakpoint, setAndTriggerBreakpoint } = require('./utils') + +const target = getTargetCodePath(__filename) + +describe('debugger -> devtools client -> snapshot.getLocalStateForCallFrame', function () { + describe('maxFieldCount', function () { + beforeEach(enable(__filename)) + + afterEach(teardown) + + describe('shold respect maxFieldCount on each collected scope', function () { + const maxFieldCount = 3 + let state + + beforeEach(function (done) { + assertOnBreakpoint(done, { maxFieldCount }, (_state) => { + state = _state + }) + setAndTriggerBreakpoint(target, 11) + }) + + it('should capture expected snapshot', function () { + // Expect the snapshot to have captured the first 3 fields from each scope + expect(state).to.have.keys(['a1', 'b1', 'c1', 'a2', 'b2', 'c2']) + }) + }) + }) +}) diff --git a/packages/dd-trace/test/debugger/devtools_client/snapshot/max-field-count.spec.js b/packages/dd-trace/test/debugger/devtools_client/snapshot/max-field-count.spec.js new file mode 100644 index 00000000000..a9507151209 --- /dev/null +++ b/packages/dd-trace/test/debugger/devtools_client/snapshot/max-field-count.spec.js @@ -0,0 +1,49 @@ +'use strict' + +require('../../../setup/mocha') + +const { getTargetCodePath, enable, teardown, assertOnBreakpoint, setAndTriggerBreakpoint } = require('./utils') + +const DEFAULT_MAX_FIELD_COUNT = 20 +const target = getTargetCodePath(__filename) + +describe('debugger -> devtools client -> snapshot.getLocalStateForCallFrame', function () { + describe('maxFieldCount', function () { + beforeEach(enable(__filename)) + + afterEach(teardown) + + describe('shold respect the default maxFieldCount if not set', generateTestCases()) + + describe('shold respect maxFieldCount if set to 10', generateTestCases({ maxFieldCount: 10 })) + }) +}) + +function generateTestCases (config) { + const maxFieldCount = config?.maxFieldCount ?? DEFAULT_MAX_FIELD_COUNT + let state + + const expectedFields = {} + for (let i = 1; i <= maxFieldCount; i++) { + expectedFields[`field${i}`] = { type: 'number', value: i.toString() } + } + + return function () { + beforeEach(function (done) { + assertOnBreakpoint(done, config, (_state) => { + state = _state + }) + setAndTriggerBreakpoint(target, 11) + }) + + it('should capture expected snapshot', function () { + expect(state).to.have.keys(['obj']) + expect(state).to.have.deep.property('obj', { + type: 'Object', + fields: expectedFields, + notCapturedReason: 'fieldCount', + size: 40 + }) + }) + } +} diff --git a/packages/dd-trace/test/debugger/devtools_client/snapshot/target-code/max-field-count-scopes.js b/packages/dd-trace/test/debugger/devtools_client/snapshot/target-code/max-field-count-scopes.js new file mode 100644 index 00000000000..90b317b8104 --- /dev/null +++ b/packages/dd-trace/test/debugger/devtools_client/snapshot/target-code/max-field-count-scopes.js @@ -0,0 +1,15 @@ +'use stict' + +function run () { + // local scope + const { a1, b1, c1, d1 } = {} + + { + // block scope + const { a2, b2, c2, d2 } = {} + + return { a1, b1, c1, d1, a2, b2, c2, d2 } // breakpoint at this line + } +} + +module.exports = { run } diff --git a/packages/dd-trace/test/debugger/devtools_client/snapshot/target-code/max-field-count.js b/packages/dd-trace/test/debugger/devtools_client/snapshot/target-code/max-field-count.js new file mode 100644 index 00000000000..ea8eb955079 --- /dev/null +++ b/packages/dd-trace/test/debugger/devtools_client/snapshot/target-code/max-field-count.js @@ -0,0 +1,14 @@ +'use stict' + +function run () { + const obj = {} + + // 40 is larger the default maxFieldCount of 20 + for (let i = 1; i <= 40; i++) { + obj[`field${i}`] = i + } + + return 'my return value' // breakpoint at this line +} + +module.exports = { run } diff --git a/packages/dd-trace/test/llmobs/index.spec.js b/packages/dd-trace/test/llmobs/index.spec.js new file mode 100644 index 00000000000..cdceeab64ab --- /dev/null +++ b/packages/dd-trace/test/llmobs/index.spec.js @@ -0,0 +1,137 @@ +'use strict' + +const proxyquire = require('proxyquire') + +const { channel } = require('dc-polyfill') +const spanProcessCh = channel('dd-trace:span:process') +const evalMetricAppendCh = channel('llmobs:eval-metric:append') +const flushCh = channel('llmobs:writers:flush') +const injectCh = channel('dd-trace:span:inject') + +const LLMObsEvalMetricsWriter = require('../../src/llmobs/writers/evaluations') + +const config = { + llmobs: { + mlApp: 'test' + } +} + +describe('module', () => { + let llmobsModule + let store + let logger + + let LLMObsAgentlessSpanWriter + let LLMObsAgentProxySpanWriter + + before(() => { + sinon.stub(LLMObsEvalMetricsWriter.prototype, 'append') + }) + + beforeEach(() => { + store = {} + logger = { debug: sinon.stub() } + + LLMObsAgentlessSpanWriter = sinon.stub().returns({ + destroy: sinon.stub() + }) + LLMObsAgentProxySpanWriter = sinon.stub().returns({ + destroy: sinon.stub() + }) + + llmobsModule = proxyquire('../../../dd-trace/src/llmobs', { + '../log': logger, + './writers/spans/agentless': LLMObsAgentlessSpanWriter, + './writers/spans/agentProxy': LLMObsAgentProxySpanWriter, + './storage': { + storage: { + getStore () { + return store + } + } + } + }) + + process.removeAllListeners('beforeExit') + }) + + afterEach(() => { + LLMObsAgentProxySpanWriter.resetHistory() + LLMObsAgentlessSpanWriter.resetHistory() + LLMObsEvalMetricsWriter.prototype.append.resetHistory() + llmobsModule.disable() + }) + + after(() => { + LLMObsEvalMetricsWriter.prototype.append.restore() + sinon.restore() + + // get rid of mock stubs for writers + delete require.cache[require.resolve('../../../dd-trace/src/llmobs')] + }) + + describe('handle llmobs info injection', () => { + it('injects LLMObs parent ID when there is a parent LLMObs span', () => { + llmobsModule.enable(config) + store.span = { + context () { + return { + toSpanId () { + return 'parent-id' + } + } + } + } + + const carrier = { + 'x-datadog-tags': '' + } + injectCh.publish({ carrier }) + + expect(carrier['x-datadog-tags']).to.equal(',_dd.p.llmobs_parent_id=parent-id') + }) + + it('does not inject LLMObs parent ID when there is no parent LLMObs span', () => { + llmobsModule.enable(config) + + const carrier = { + 'x-datadog-tags': '' + } + injectCh.publish({ carrier }) + expect(carrier['x-datadog-tags']).to.equal('') + }) + }) + + it('uses the agent proxy span writer', () => { + llmobsModule.enable(config) + expect(LLMObsAgentProxySpanWriter).to.have.been.called + }) + + it('uses the agentless span writer', () => { + config.llmobs.agentlessEnabled = true + llmobsModule.enable(config) + expect(LLMObsAgentlessSpanWriter).to.have.been.called + delete config.llmobs.agentlessEnabled + }) + + it('appends to the eval metric writer', () => { + llmobsModule.enable(config) + + const payload = {} + + evalMetricAppendCh.publish(payload) + + expect(LLMObsEvalMetricsWriter.prototype.append).to.have.been.calledWith(payload) + }) + + it('removes all subscribers when disabling', () => { + llmobsModule.enable(config) + + llmobsModule.disable() + + expect(injectCh.hasSubscribers).to.be.false + expect(evalMetricAppendCh.hasSubscribers).to.be.false + expect(spanProcessCh.hasSubscribers).to.be.false + expect(flushCh.hasSubscribers).to.be.false + }) +}) diff --git a/packages/dd-trace/test/llmobs/noop.spec.js b/packages/dd-trace/test/llmobs/noop.spec.js new file mode 100644 index 00000000000..36dd2279390 --- /dev/null +++ b/packages/dd-trace/test/llmobs/noop.spec.js @@ -0,0 +1,58 @@ +'use strict' + +describe('noop', () => { + let tracer + let llmobs + + before(() => { + tracer = new (require('../../../dd-trace/src/noop/proxy'))() + llmobs = tracer.llmobs + }) + + const nonTracingOps = ['enable', 'disable', 'annotate', 'exportSpan', 'submitEvaluation', 'flush'] + for (const op of nonTracingOps) { + it(`using "${op}" should not throw`, () => { + llmobs[op]() + }) + } + + describe('trace', () => { + it('should not throw with just a span', () => { + const res = llmobs.trace({}, (span) => { + expect(() => span.setTag('foo', 'bar')).does.not.throw + return 1 + }) + + expect(res).to.equal(1) + }) + + it('should not throw with a span and a callback', async () => { + const prom = llmobs.trace({}, (span, cb) => { + expect(() => span.setTag('foo', 'bar')).does.not.throw + expect(() => cb()).does.not.throw + return Promise.resolve(5) + }) + + expect(await prom).to.equal(5) + }) + }) + + describe('wrap', () => { + it('should not throw with just a span', () => { + function fn () { + return 1 + } + + const wrapped = llmobs.wrap({}, fn) + expect(wrapped()).to.equal(1) + }) + + it('should not throw with a span and a callback', async () => { + function fn () { + return Promise.resolve(5) + } + const wrapped = llmobs.wrap({}, fn) + expect(await wrapped()).to.equal(5) + }) + }) +}) diff --git a/packages/dd-trace/test/llmobs/plugins/openai/openaiv3.spec.js b/packages/dd-trace/test/llmobs/plugins/openai/openaiv3.spec.js new file mode 100644 index 00000000000..e78fa298b8c --- /dev/null +++ b/packages/dd-trace/test/llmobs/plugins/openai/openaiv3.spec.js @@ -0,0 +1,382 @@ +'use strict' + +const agent = require('../../../plugins/agent') +const Sampler = require('../../../../src/sampler') +const { DogStatsDClient } = require('../../../../src/dogstatsd') +const { NoopExternalLogger } = require('../../../../src/external-logger/src') + +const nock = require('nock') +const { expectedLLMObsLLMSpanEvent, deepEqualWithMockValues } = require('../../util') +const chai = require('chai') +const semver = require('semver') +const LLMObsAgentProxySpanWriter = require('../../../../src/llmobs/writers/spans/agentProxy') + +const { expect } = chai + +chai.Assertion.addMethod('deepEqualWithMockValues', deepEqualWithMockValues) + +const satisfiesChatCompletion = version => semver.intersects('>=3.2.0', version) + +describe('integrations', () => { + let openai + + describe('openai', () => { + before(() => { + sinon.stub(LLMObsAgentProxySpanWriter.prototype, 'append') + + // reduce errors related to too many listeners + process.removeAllListeners('beforeExit') + + sinon.stub(DogStatsDClient.prototype, '_add') + sinon.stub(NoopExternalLogger.prototype, 'log') + sinon.stub(Sampler.prototype, 'isSampled').returns(true) + + LLMObsAgentProxySpanWriter.prototype.append.reset() + + return agent.load('openai', {}, { + llmobs: { + mlApp: 'test' + } + }) + }) + + afterEach(() => { + nock.cleanAll() + LLMObsAgentProxySpanWriter.prototype.append.reset() + }) + + after(() => { + require('../../../../../dd-trace').llmobs.disable() // unsubscribe from all events + sinon.restore() + return agent.close({ ritmReset: false, wipe: true }) + }) + + withVersions('openai', 'openai', '<4', version => { + const moduleRequirePath = `../../../../../../versions/openai@${version}` + + beforeEach(() => { + const requiredModule = require(moduleRequirePath) + const module = requiredModule.get() + + const { Configuration, OpenAIApi } = module + + const configuration = new Configuration({ + apiKey: 'sk-DATADOG-ACCEPTANCE-TESTS' + }) + + openai = new OpenAIApi(configuration) + }) + + it('submits a completion span', async () => { + nock('https://api.openai.com:443') + .post('/v1/completions') + .reply(200, { + model: 'text-davinci-002', + choices: [{ + text: 'I am doing well, how about you?', + index: 0, + logprobs: null, + finish_reason: 'length' + }], + usage: { prompt_tokens: 3, completion_tokens: 16, total_tokens: 19 } + }, []) + + const checkSpan = agent.use(traces => { + const span = traces[0][0] + const spanEvent = LLMObsAgentProxySpanWriter.prototype.append.getCall(0).args[0] + + const expected = expectedLLMObsLLMSpanEvent({ + span, + spanKind: 'llm', + name: 'openai.createCompletion', + inputMessages: [ + { content: 'How are you?' } + ], + outputMessages: [ + { content: 'I am doing well, how about you?' } + ], + tokenMetrics: { input_tokens: 3, output_tokens: 16, total_tokens: 19 }, + modelName: 'text-davinci-002', + modelProvider: 'openai', + metadata: {}, + tags: { ml_app: 'test', language: 'javascript' } + }) + + expect(spanEvent).to.deepEqualWithMockValues(expected) + }) + + await openai.createCompletion({ + model: 'text-davinci-002', + prompt: 'How are you?' + }) + + await checkSpan + }) + + if (satisfiesChatCompletion(version)) { + it('submits a chat completion span', async () => { + nock('https://api.openai.com:443') + .post('/v1/chat/completions') + .reply(200, { + id: 'chatcmpl-7GaWqyMTD9BLmkmy8SxyjUGX3KSRN', + object: 'chat.completion', + created: 1684188020, + model: 'gpt-3.5-turbo-0301', + usage: { + prompt_tokens: 37, + completion_tokens: 10, + total_tokens: 47 + }, + choices: [{ + message: { + role: 'assistant', + content: 'I am doing well, how about you?' + }, + finish_reason: 'length', + index: 0 + }] + }, []) + + const checkSpan = agent.use(traces => { + const span = traces[0][0] + const spanEvent = LLMObsAgentProxySpanWriter.prototype.append.getCall(0).args[0] + + const expected = expectedLLMObsLLMSpanEvent({ + span, + spanKind: 'llm', + name: 'openai.createChatCompletion', + inputMessages: [ + { role: 'system', content: 'You are a helpful assistant' }, + { role: 'user', content: 'How are you?' } + ], + outputMessages: [ + { role: 'assistant', content: 'I am doing well, how about you?' } + ], + tokenMetrics: { input_tokens: 37, output_tokens: 10, total_tokens: 47 }, + modelName: 'gpt-3.5-turbo-0301', + modelProvider: 'openai', + metadata: {}, + tags: { ml_app: 'test', language: 'javascript' } + }) + + expect(spanEvent).to.deepEqualWithMockValues(expected) + }) + + await openai.createChatCompletion({ + model: 'gpt-3.5-turbo-0301', + messages: [ + { role: 'system', content: 'You are a helpful assistant' }, + { role: 'user', content: 'How are you?' } + ] + }) + + await checkSpan + }) + } + + it('submits an embedding span', async () => { + nock('https://api.openai.com:443') + .post('/v1/embeddings') + .reply(200, { + object: 'list', + data: [{ + object: 'embedding', + index: 0, + embedding: [-0.0034387498, -0.026400521] + }], + model: 'text-embedding-ada-002-v2', + usage: { + prompt_tokens: 2, + total_tokens: 2 + } + }, []) + + const checkSpan = agent.use(traces => { + const span = traces[0][0] + const spanEvent = LLMObsAgentProxySpanWriter.prototype.append.getCall(0).args[0] + + const expected = expectedLLMObsLLMSpanEvent({ + span, + spanKind: 'embedding', + name: 'openai.createEmbedding', + inputDocuments: [ + { text: 'Hello, world!' } + ], + outputValue: '[1 embedding(s) returned with size 2]', + tokenMetrics: { input_tokens: 2, total_tokens: 2 }, + modelName: 'text-embedding-ada-002-v2', + modelProvider: 'openai', + metadata: { encoding_format: 'float' }, + tags: { ml_app: 'test', language: 'javascript' } + }) + + expect(spanEvent).to.deepEqualWithMockValues(expected) + }) + + await openai.createEmbedding({ + model: 'text-embedding-ada-002-v2', + input: 'Hello, world!' + }) + + await checkSpan + }) + + if (satisfiesChatCompletion(version)) { + it('submits a chat completion span with functions', async () => { + nock('https://api.openai.com:443') + .post('/v1/chat/completions') + .reply(200, { + id: 'chatcmpl-7GaWqyMTD9BLmkmy8SxyjUGX3KSRN', + object: 'chat.completion', + created: 1684188020, + model: 'gpt-3.5-turbo-0301', + usage: { + prompt_tokens: 37, + completion_tokens: 10, + total_tokens: 47 + }, + choices: [{ + message: { + role: 'assistant', + content: 'THOUGHT: I will use the "extract_fictional_info" tool', + function_call: { + name: 'extract_fictional_info', + arguments: '{"name":"SpongeBob","origin":"Bikini Bottom"}' + } + }, + finish_reason: 'function_call', + index: 0 + }] + }, []) + + const checkSpan = agent.use(traces => { + const span = traces[0][0] + const spanEvent = LLMObsAgentProxySpanWriter.prototype.append.getCall(0).args[0] + + const expected = expectedLLMObsLLMSpanEvent({ + span, + spanKind: 'llm', + name: 'openai.createChatCompletion', + modelName: 'gpt-3.5-turbo-0301', + modelProvider: 'openai', + inputMessages: [{ role: 'user', content: 'What is SpongeBob SquarePants\'s origin?' }], + outputMessages: [{ + role: 'assistant', + content: 'THOUGHT: I will use the "extract_fictional_info" tool', + tool_calls: [ + { + name: 'extract_fictional_info', + arguments: { + name: 'SpongeBob', + origin: 'Bikini Bottom' + } + } + ] + }], + metadata: { function_call: 'auto' }, + tags: { ml_app: 'test', language: 'javascript' }, + tokenMetrics: { input_tokens: 37, output_tokens: 10, total_tokens: 47 } + }) + + expect(spanEvent).to.deepEqualWithMockValues(expected) + }) + + await openai.createChatCompletion({ + model: 'gpt-3.5-turbo-0301', + messages: [{ role: 'user', content: 'What is SpongeBob SquarePants\'s origin?' }], + functions: [{ type: 'function', functiin: { /* this doesn't matter */} }], + function_call: 'auto' + }) + + await checkSpan + }) + } + + it('submits a completion span with an error', async () => { + nock('https://api.openai.com:443') + .post('/v1/completions') + .reply(400, {}) + + let error + const checkSpan = agent.use(traces => { + const span = traces[0][0] + const spanEvent = LLMObsAgentProxySpanWriter.prototype.append.getCall(0).args[0] + + const expected = expectedLLMObsLLMSpanEvent({ + span, + spanKind: 'llm', + name: 'openai.createCompletion', + inputMessages: [{ content: 'Hello' }], + outputMessages: [{ content: '' }], + modelName: 'gpt-3.5-turbo', + modelProvider: 'openai', + metadata: { max_tokens: 50 }, + tags: { ml_app: 'test', language: 'javascript' }, + error, + errorType: error.type || error.name, + errorMessage: error.message, + errorStack: error.stack + }) + + expect(spanEvent).to.deepEqualWithMockValues(expected) + }) + + try { + await openai.createCompletion({ + model: 'gpt-3.5-turbo', + prompt: 'Hello', + max_tokens: 50 + }) + } catch (e) { + error = e + } + + await checkSpan + }) + + if (satisfiesChatCompletion(version)) { + it('submits a chat completion span with an error', async () => { + nock('https://api.openai.com:443') + .post('/v1/chat/completions') + .reply(400, {}) + + let error + const checkSpan = agent.use(traces => { + const span = traces[0][0] + const spanEvent = LLMObsAgentProxySpanWriter.prototype.append.getCall(0).args[0] + + const expected = expectedLLMObsLLMSpanEvent({ + span, + spanKind: 'llm', + name: 'openai.createChatCompletion', + inputMessages: [{ role: 'user', content: 'Hello' }], + outputMessages: [{ content: '' }], + modelName: 'gpt-3.5-turbo', + modelProvider: 'openai', + metadata: { max_tokens: 50 }, + tags: { ml_app: 'test', language: 'javascript' }, + error, + errorType: error.type || error.name, + errorMessage: error.message, + errorStack: error.stack + }) + + expect(spanEvent).to.deepEqualWithMockValues(expected) + }) + + try { + await openai.createChatCompletion({ + model: 'gpt-3.5-turbo', + messages: [{ role: 'user', content: 'Hello' }], + max_tokens: 50 + }) + } catch (e) { + error = e + } + + await checkSpan + }) + } + }) + }) +}) diff --git a/packages/dd-trace/test/llmobs/plugins/openai/openaiv4.spec.js b/packages/dd-trace/test/llmobs/plugins/openai/openaiv4.spec.js new file mode 100644 index 00000000000..0d4e369525f --- /dev/null +++ b/packages/dd-trace/test/llmobs/plugins/openai/openaiv4.spec.js @@ -0,0 +1,554 @@ +'use strict' + +const fs = require('fs') +const Path = require('path') +const agent = require('../../../plugins/agent') +const Sampler = require('../../../../src/sampler') +const { DogStatsDClient } = require('../../../../src/dogstatsd') +const { NoopExternalLogger } = require('../../../../src/external-logger/src') + +const nock = require('nock') +const { expectedLLMObsLLMSpanEvent, deepEqualWithMockValues } = require('../../util') +const chai = require('chai') +const semver = require('semver') +const LLMObsAgentProxySpanWriter = require('../../../../src/llmobs/writers/spans/agentProxy') + +const { expect } = chai + +chai.Assertion.addMethod('deepEqualWithMockValues', deepEqualWithMockValues) + +const baseOpenAITestsPath = '../../../../../datadog-plugin-openai/test/' + +const satisfiesTools = version => semver.intersects('>4.16.0', version) +const satisfiesStream = version => semver.intersects('>4.1.0', version) + +describe('integrations', () => { + let openai + + describe('openai', () => { + before(() => { + sinon.stub(LLMObsAgentProxySpanWriter.prototype, 'append') + + // reduce errors related to too many listeners + process.removeAllListeners('beforeExit') + + sinon.stub(DogStatsDClient.prototype, '_add') + sinon.stub(NoopExternalLogger.prototype, 'log') + sinon.stub(Sampler.prototype, 'isSampled').returns(true) + + LLMObsAgentProxySpanWriter.prototype.append.reset() + + return agent.load('openai', {}, { + llmobs: { + mlApp: 'test' + } + }) + }) + + afterEach(() => { + nock.cleanAll() + LLMObsAgentProxySpanWriter.prototype.append.reset() + }) + + after(() => { + sinon.restore() + require('../../../../../dd-trace').llmobs.disable() // unsubscribe from all events + // delete require.cache[require.resolve('../../../../dd-trace')] + return agent.close({ ritmReset: false, wipe: true }) + }) + + withVersions('openai', 'openai', '>=4', version => { + const moduleRequirePath = `../../../../../../versions/openai@${version}` + + beforeEach(() => { + const requiredModule = require(moduleRequirePath) + const module = requiredModule.get() + + const OpenAI = module + + openai = new OpenAI({ + apiKey: 'test' + }) + }) + + it('submits a completion span', async () => { + nock('https://api.openai.com:443') + .post('/v1/completions') + .reply(200, { + model: 'text-davinci-002', + choices: [{ + text: 'I am doing well, how about you?', + index: 0, + logprobs: null, + finish_reason: 'length' + }], + usage: { prompt_tokens: 3, completion_tokens: 16, total_tokens: 19 } + }, []) + + const checkSpan = agent.use(traces => { + const span = traces[0][0] + const spanEvent = LLMObsAgentProxySpanWriter.prototype.append.getCall(0).args[0] + + const expected = expectedLLMObsLLMSpanEvent({ + span, + spanKind: 'llm', + name: 'openai.createCompletion', + inputMessages: [ + { content: 'How are you?' } + ], + outputMessages: [ + { content: 'I am doing well, how about you?' } + ], + tokenMetrics: { input_tokens: 3, output_tokens: 16, total_tokens: 19 }, + modelName: 'text-davinci-002', + modelProvider: 'openai', + metadata: {}, + tags: { ml_app: 'test', language: 'javascript' } + }) + + expect(spanEvent).to.deepEqualWithMockValues(expected) + }) + + await openai.completions.create({ + model: 'text-davinci-002', + prompt: 'How are you?' + }) + + await checkSpan + }) + + it('submits a chat completion span', async () => { + nock('https://api.openai.com:443') + .post('/v1/chat/completions') + .reply(200, { + id: 'chatcmpl-7GaWqyMTD9BLmkmy8SxyjUGX3KSRN', + object: 'chat.completion', + created: 1684188020, + model: 'gpt-3.5-turbo-0301', + usage: { + prompt_tokens: 37, + completion_tokens: 10, + total_tokens: 47 + }, + choices: [{ + message: { + role: 'assistant', + content: 'I am doing well, how about you?' + }, + finish_reason: 'length', + index: 0 + }] + }, []) + + const checkSpan = agent.use(traces => { + const span = traces[0][0] + const spanEvent = LLMObsAgentProxySpanWriter.prototype.append.getCall(0).args[0] + + const expected = expectedLLMObsLLMSpanEvent({ + span, + spanKind: 'llm', + name: 'openai.createChatCompletion', + inputMessages: [ + { role: 'system', content: 'You are a helpful assistant' }, + { role: 'user', content: 'How are you?' } + ], + outputMessages: [ + { role: 'assistant', content: 'I am doing well, how about you?' } + ], + tokenMetrics: { input_tokens: 37, output_tokens: 10, total_tokens: 47 }, + modelName: 'gpt-3.5-turbo-0301', + modelProvider: 'openai', + metadata: {}, + tags: { ml_app: 'test', language: 'javascript' } + }) + + expect(spanEvent).to.deepEqualWithMockValues(expected) + }) + + await openai.chat.completions.create({ + model: 'gpt-3.5-turbo-0301', + messages: [ + { role: 'system', content: 'You are a helpful assistant' }, + { role: 'user', content: 'How are you?' } + ] + }) + + await checkSpan + }) + + it('submits an embedding span', async () => { + nock('https://api.openai.com:443') + .post('/v1/embeddings') + .reply(200, { + object: 'list', + data: [{ + object: 'embedding', + index: 0, + embedding: [-0.0034387498, -0.026400521] + }], + model: 'text-embedding-ada-002-v2', + usage: { + prompt_tokens: 2, + total_tokens: 2 + } + }, []) + + const checkSpan = agent.use(traces => { + const span = traces[0][0] + const spanEvent = LLMObsAgentProxySpanWriter.prototype.append.getCall(0).args[0] + + const expected = expectedLLMObsLLMSpanEvent({ + span, + spanKind: 'embedding', + name: 'openai.createEmbedding', + inputDocuments: [ + { text: 'Hello, world!' } + ], + outputValue: '[1 embedding(s) returned with size 2]', + tokenMetrics: { input_tokens: 2, total_tokens: 2 }, + modelName: 'text-embedding-ada-002-v2', + modelProvider: 'openai', + metadata: { encoding_format: 'float' }, + tags: { ml_app: 'test', language: 'javascript' } + }) + + expect(spanEvent).to.deepEqualWithMockValues(expected) + }) + + await openai.embeddings.create({ + model: 'text-embedding-ada-002-v2', + input: 'Hello, world!' + }) + + await checkSpan + }) + + if (satisfiesTools(version)) { + it('submits a chat completion span with tools', async () => { + nock('https://api.openai.com:443') + .post('/v1/chat/completions') + .reply(200, { + id: 'chatcmpl-7GaWqyMTD9BLmkmy8SxyjUGX3KSRN', + object: 'chat.completion', + created: 1684188020, + model: 'gpt-3.5-turbo-0301', + usage: { + prompt_tokens: 37, + completion_tokens: 10, + total_tokens: 47 + }, + choices: [{ + message: { + role: 'assistant', + content: 'THOUGHT: I will use the "extract_fictional_info" tool', + tool_calls: [ + { + id: 'tool-1', + type: 'function', + function: { + name: 'extract_fictional_info', + arguments: '{"name":"SpongeBob","origin":"Bikini Bottom"}' + } + } + ] + }, + finish_reason: 'tool_calls', + index: 0 + }] + }, []) + + const checkSpan = agent.use(traces => { + const span = traces[0][0] + const spanEvent = LLMObsAgentProxySpanWriter.prototype.append.getCall(0).args[0] + + const expected = expectedLLMObsLLMSpanEvent({ + span, + spanKind: 'llm', + name: 'openai.createChatCompletion', + modelName: 'gpt-3.5-turbo-0301', + modelProvider: 'openai', + inputMessages: [{ role: 'user', content: 'What is SpongeBob SquarePants\'s origin?' }], + outputMessages: [{ + role: 'assistant', + content: 'THOUGHT: I will use the "extract_fictional_info" tool', + tool_calls: [ + { + name: 'extract_fictional_info', + arguments: { + name: 'SpongeBob', + origin: 'Bikini Bottom' + }, + tool_id: 'tool-1', + type: 'function' + } + ] + }], + metadata: { tool_choice: 'auto' }, + tags: { ml_app: 'test', language: 'javascript' }, + tokenMetrics: { input_tokens: 37, output_tokens: 10, total_tokens: 47 } + }) + + expect(spanEvent).to.deepEqualWithMockValues(expected) + }) + + await openai.chat.completions.create({ + model: 'gpt-3.5-turbo-0301', + messages: [{ role: 'user', content: 'What is SpongeBob SquarePants\'s origin?' }], + tools: [{ type: 'function', functiin: { /* this doesn't matter */} }], + tool_choice: 'auto' + }) + + await checkSpan + }) + } + + if (satisfiesStream(version)) { + it('submits a streamed completion span', async () => { + nock('https://api.openai.com:443') + .post('/v1/completions') + .reply(200, function () { + return fs.createReadStream(Path.join( + __dirname, baseOpenAITestsPath, 'streamed-responses/completions.simple.txt' + )) + }, { + 'Content-Type': 'text/plain', + 'openai-organization': 'kill-9' + }) + + const checkSpan = agent.use(traces => { + const span = traces[0][0] + const spanEvent = LLMObsAgentProxySpanWriter.prototype.append.getCall(0).args[0] + + const expected = expectedLLMObsLLMSpanEvent({ + span, + spanKind: 'llm', + name: 'openai.createCompletion', + inputMessages: [ + { content: 'Can you say this is a test?' } + ], + outputMessages: [ + { content: ' this is a test.' } + ], + tokenMetrics: { input_tokens: 8, output_tokens: 5, total_tokens: 13 }, + modelName: 'text-davinci-002', + modelProvider: 'openai', + metadata: { temperature: 0.5, stream: true }, + tags: { ml_app: 'test', language: 'javascript' } + }) + + expect(spanEvent).to.deepEqualWithMockValues(expected) + }) + + const stream = await openai.completions.create({ + model: 'text-davinci-002', + prompt: 'Can you say this is a test?', + temperature: 0.5, + stream: true + }) + + for await (const part of stream) { + expect(part).to.have.property('choices') + expect(part.choices[0]).to.have.property('text') + } + + await checkSpan + }) + + it('submits a streamed chat completion span', async () => { + nock('https://api.openai.com:443') + .post('/v1/chat/completions') + .reply(200, function () { + return fs.createReadStream(Path.join( + __dirname, baseOpenAITestsPath, 'streamed-responses/chat.completions.simple.txt' + )) + }, { + 'Content-Type': 'text/plain', + 'openai-organization': 'kill-9' + }) + + const checkSpan = agent.use(traces => { + const span = traces[0][0] + const spanEvent = LLMObsAgentProxySpanWriter.prototype.append.getCall(0).args[0] + + const expected = expectedLLMObsLLMSpanEvent({ + span, + spanKind: 'llm', + name: 'openai.createChatCompletion', + inputMessages: [ + { role: 'user', content: 'Hello' } + ], + outputMessages: [ + { role: 'assistant', content: 'Hello! How can I assist you today?' } + ], + tokenMetrics: { input_tokens: 1, output_tokens: 9, total_tokens: 10 }, + modelName: 'gpt-3.5-turbo-0301', + modelProvider: 'openai', + metadata: { stream: true }, + tags: { ml_app: 'test', language: 'javascript' } + }) + + expect(spanEvent).to.deepEqualWithMockValues(expected) + }) + + const stream = await openai.chat.completions.create({ + model: 'gpt-3.5-turbo-0301', + messages: [{ role: 'user', content: 'Hello' }], + stream: true + }) + + for await (const part of stream) { + expect(part).to.have.property('choices') + expect(part.choices[0]).to.have.property('delta') + } + + await checkSpan + }) + + if (satisfiesTools(version)) { + it('submits a chat completion span with tools stream', async () => { + nock('https://api.openai.com:443') + .post('/v1/chat/completions') + .reply(200, function () { + return fs.createReadStream(Path.join( + __dirname, baseOpenAITestsPath, 'streamed-responses/chat.completions.tool.and.content.txt' + )) + }, { + 'Content-Type': 'text/plain', + 'openai-organization': 'kill-9' + }) + + const checkSpan = agent.use(traces => { + const span = traces[0][0] + const spanEvent = LLMObsAgentProxySpanWriter.prototype.append.getCall(0).args[0] + + const expected = expectedLLMObsLLMSpanEvent({ + span, + spanKind: 'llm', + name: 'openai.createChatCompletion', + modelName: 'gpt-3.5-turbo-0301', + modelProvider: 'openai', + inputMessages: [{ role: 'user', content: 'What function would you call to finish this?' }], + outputMessages: [{ + role: 'assistant', + content: 'THOUGHT: Hi', + tool_calls: [ + { + name: 'finish', + arguments: { answer: '5' }, + type: 'function', + tool_id: 'call_Tg0o5wgoNSKF2iggAPmfWwem' + } + ] + }], + metadata: { tool_choice: 'auto', stream: true }, + tags: { ml_app: 'test', language: 'javascript' }, + tokenMetrics: { input_tokens: 9, output_tokens: 5, total_tokens: 14 } + }) + + expect(spanEvent).to.deepEqualWithMockValues(expected) + }) + + const stream = await openai.chat.completions.create({ + model: 'gpt-3.5-turbo-0301', + messages: [{ role: 'user', content: 'What function would you call to finish this?' }], + tools: [{ type: 'function', function: { /* this doesn't matter */ } }], + tool_choice: 'auto', + stream: true + }) + + for await (const part of stream) { + expect(part).to.have.property('choices') + expect(part.choices[0]).to.have.property('delta') + } + + await checkSpan + }) + } + } + + it('submits a completion span with an error', async () => { + nock('https://api.openai.com:443') + .post('/v1/completions') + .reply(400, {}) + + let error + const checkSpan = agent.use(traces => { + const span = traces[0][0] + const spanEvent = LLMObsAgentProxySpanWriter.prototype.append.getCall(0).args[0] + + const expected = expectedLLMObsLLMSpanEvent({ + span, + spanKind: 'llm', + name: 'openai.createCompletion', + inputMessages: [{ content: 'Hello' }], + outputMessages: [{ content: '' }], + modelName: 'gpt-3.5-turbo', + modelProvider: 'openai', + metadata: { max_tokens: 50 }, + tags: { ml_app: 'test', language: 'javascript' }, + error, + errorType: error.type || error.name, + errorMessage: error.message, + errorStack: error.stack + }) + + expect(spanEvent).to.deepEqualWithMockValues(expected) + }) + + try { + await openai.completions.create({ + model: 'gpt-3.5-turbo', + prompt: 'Hello', + max_tokens: 50 + }) + } catch (e) { + error = e + } + + await checkSpan + }) + + it('submits a chat completion span with an error', async () => { + nock('https://api.openai.com:443') + .post('/v1/chat/completions') + .reply(400, {}) + + let error + const checkSpan = agent.use(traces => { + const span = traces[0][0] + const spanEvent = LLMObsAgentProxySpanWriter.prototype.append.getCall(0).args[0] + + const expected = expectedLLMObsLLMSpanEvent({ + span, + spanKind: 'llm', + name: 'openai.createChatCompletion', + inputMessages: [{ role: 'user', content: 'Hello' }], + outputMessages: [{ content: '' }], + modelName: 'gpt-3.5-turbo', + modelProvider: 'openai', + metadata: { max_tokens: 50 }, + tags: { ml_app: 'test', language: 'javascript' }, + error, + errorType: error.type || error.name, + errorMessage: error.message, + errorStack: error.stack + }) + + expect(spanEvent).to.deepEqualWithMockValues(expected) + }) + + try { + await openai.chat.completions.create({ + model: 'gpt-3.5-turbo', + messages: [{ role: 'user', content: 'Hello' }], + max_tokens: 50 + }) + } catch (e) { + error = e + } + + await checkSpan + }) + }) + }) +}) diff --git a/packages/dd-trace/test/llmobs/sdk/index.spec.js b/packages/dd-trace/test/llmobs/sdk/index.spec.js new file mode 100644 index 00000000000..90415f9bd0b --- /dev/null +++ b/packages/dd-trace/test/llmobs/sdk/index.spec.js @@ -0,0 +1,1027 @@ +'use strict' + +const { expect } = require('chai') +const Config = require('../../../src/config') + +const LLMObsTagger = require('../../../src/llmobs/tagger') +const LLMObsEvalMetricsWriter = require('../../../src/llmobs/writers/evaluations') +const LLMObsAgentProxySpanWriter = require('../../../src/llmobs/writers/spans/agentProxy') +const LLMObsSpanProcessor = require('../../../src/llmobs/span_processor') + +const tracerVersion = require('../../../../../package.json').version + +const { channel } = require('dc-polyfill') +const injectCh = channel('dd-trace:span:inject') + +describe('sdk', () => { + let LLMObsSDK + let llmobs + let tracer + + before(() => { + tracer = require('../../../../dd-trace') + tracer.init({ + service: 'service', + llmobs: { + mlApp: 'mlApp' + } + }) + llmobs = tracer.llmobs + + // spy on properties + sinon.spy(LLMObsSpanProcessor.prototype, 'process') + sinon.spy(LLMObsSpanProcessor.prototype, 'format') + sinon.spy(tracer._tracer._processor, 'process') + + // stub writer functionality + sinon.stub(LLMObsEvalMetricsWriter.prototype, 'append') + sinon.stub(LLMObsEvalMetricsWriter.prototype, 'flush') + sinon.stub(LLMObsAgentProxySpanWriter.prototype, 'append') + sinon.stub(LLMObsAgentProxySpanWriter.prototype, 'flush') + + LLMObsSDK = require('../../../src/llmobs/sdk') + + // remove max listener warnings, we don't care about the writer anyways + process.removeAllListeners('beforeExit') + }) + + afterEach(() => { + LLMObsSpanProcessor.prototype.process.resetHistory() + LLMObsSpanProcessor.prototype.format.resetHistory() + tracer._tracer._processor.process.resetHistory() + + LLMObsEvalMetricsWriter.prototype.append.resetHistory() + LLMObsEvalMetricsWriter.prototype.flush.resetHistory() + + LLMObsAgentProxySpanWriter.prototype.append.resetHistory() + LLMObsAgentProxySpanWriter.prototype.flush.resetHistory() + + process.removeAllListeners('beforeExit') + }) + + after(() => { + sinon.restore() + llmobs.disable() + }) + + describe('enabled', () => { + for (const [value, label] of [ + [true, 'enabled'], + [false, 'disabled'] + ]) { + it(`returns ${value} when llmobs is ${label}`, () => { + const enabledOrDisabledLLMObs = new LLMObsSDK(null, { disable () {} }, { llmobs: { enabled: value } }) + + expect(enabledOrDisabledLLMObs.enabled).to.equal(value) + enabledOrDisabledLLMObs.disable() // unsubscribe + }) + } + }) + + describe('enable', () => { + it('enables llmobs if it is disabled', () => { + const config = new Config({}) + const llmobsModule = { + enable: sinon.stub(), + disable () {} + } + + // do not fully enable a disabled llmobs + const disabledLLMObs = new LLMObsSDK(tracer._tracer, llmobsModule, config) + + disabledLLMObs.enable({ + mlApp: 'mlApp' + }) + + expect(disabledLLMObs.enabled).to.be.true + expect(disabledLLMObs._config.llmobs.mlApp).to.equal('mlApp') + expect(disabledLLMObs._config.llmobs.agentlessEnabled).to.be.false + + expect(llmobsModule.enable).to.have.been.called + + disabledLLMObs.disable() // unsubscribe + }) + + it('does not enable llmobs if it is already enabled', () => { + sinon.spy(llmobs._llmobsModule, 'enable') + llmobs.enable({}) + + expect(llmobs.enabled).to.be.true + expect(llmobs._llmobsModule.enable).to.not.have.been.called + llmobs._llmobsModule.enable.restore() + }) + + it('does not enable llmobs if env var conflicts', () => { + const config = new Config({}) + const llmobsModule = { + enable: sinon.stub() + } + + // do not fully enable a disabled llmobs + const disabledLLMObs = new LLMObsSDK(tracer._tracer, llmobsModule, config) + process.env.DD_LLMOBS_ENABLED = 'false' + + disabledLLMObs.enable({}) + + expect(disabledLLMObs.enabled).to.be.false + delete process.env.DD_LLMOBS_ENABLED + disabledLLMObs.disable() // unsubscribe + }) + }) + + describe('disable', () => { + it('disables llmobs if it is enabled', () => { + const llmobsModule = { + disable: sinon.stub() + } + + const config = new Config({ + llmobs: {} + }) + + const enabledLLMObs = new LLMObsSDK(tracer._tracer, llmobsModule, config) + + expect(enabledLLMObs.enabled).to.be.true + enabledLLMObs.disable() + + expect(enabledLLMObs.enabled).to.be.false + expect(llmobsModule.disable).to.have.been.called + }) + + it('does not disable llmobs if it is already disabled', () => { + // do not fully enable a disabled llmobs + const disabledLLMObs = new LLMObsSDK(null, { disable () {} }, { llmobs: { enabled: false } }) + sinon.spy(disabledLLMObs._llmobsModule, 'disable') + + disabledLLMObs.disable() + + expect(disabledLLMObs.enabled).to.be.false + expect(disabledLLMObs._llmobsModule.disable).to.not.have.been.called + }) + }) + + describe('tracing', () => { + describe('trace', () => { + describe('tracing behavior', () => { + it('starts a span if llmobs is disabled but does not process it in the LLMObs span processor', () => { + tracer._tracer._config.llmobs.enabled = false + + llmobs.trace({ kind: 'workflow', name: 'myWorkflow' }, (span, cb) => { + expect(LLMObsTagger.tagMap.get(span)).to.not.exist + expect(() => span.setTag('k', 'v')).to.not.throw() + expect(() => cb()).to.not.throw() + }) + + expect(llmobs._tracer._processor.process).to.have.been.called + expect(LLMObsSpanProcessor.prototype.format).to.not.have.been.called + + tracer._tracer._config.llmobs.enabled = true + }) + + it('throws if the kind is invalid', () => { + expect(() => llmobs.trace({ kind: 'invalid' }, () => {})).to.throw() + + expect(llmobs._tracer._processor.process).to.not.have.been.called + expect(LLMObsSpanProcessor.prototype.format).to.not.have.been.called + }) + + // TODO: need span kind optional for this + it.skip('throws if no name is provided', () => { + expect(() => llmobs.trace({ kind: 'workflow' }, () => {})).to.throw() + + expect(llmobs._tracer._processor.process).to.not.have.been.called + expect(LLMObsSpanProcessor.prototype.format).to.not.have.been.called + }) + + it('traces a block', () => { + let span + + llmobs.trace({ kind: 'workflow' }, _span => { + span = _span + sinon.spy(span, 'finish') + }) + + expect(span.finish).to.have.been.called + }) + + it('traces a block with a callback', () => { + let span + let done + + llmobs.trace({ kind: 'workflow' }, (_span, _done) => { + span = _span + sinon.spy(span, 'finish') + done = _done + }) + + expect(span.finish).to.not.have.been.called + + done() + + expect(span.finish).to.have.been.called + }) + + it('traces a promise', done => { + const deferred = {} + const promise = new Promise(resolve => { + deferred.resolve = resolve + }) + + let span + + llmobs + .trace({ kind: 'workflow' }, _span => { + span = _span + sinon.spy(span, 'finish') + return promise + }) + .then(() => { + expect(span.finish).to.have.been.called + done() + }) + .catch(done) + + expect(span.finish).to.not.have.been.called + + deferred.resolve() + }) + }) + + describe('parentage', () => { + // TODO: need to implement custom trace IDs + it.skip('starts a span with a distinct trace id', () => { + llmobs.trace({ kind: 'workflow', name: 'test' }, span => { + expect(LLMObsTagger.tagMap.get(span)['_ml_obs.trace_id']) + .to.exist.and.to.not.equal(span.context().toTraceId(true)) + }) + }) + + it('sets span parentage correctly', () => { + llmobs.trace({ kind: 'workflow', name: 'test' }, outerLLMSpan => { + llmobs.trace({ kind: 'task', name: 'test' }, innerLLMSpan => { + expect(LLMObsTagger.tagMap.get(innerLLMSpan)['_ml_obs.llmobs_parent_id']) + .to.equal(outerLLMSpan.context().toSpanId()) + // TODO: need to implement custom trace IDs + // expect(innerLLMSpan.context()._tags['_ml_obs.trace_id']) + // .to.equal(outerLLMSpan.context()._tags['_ml_obs.trace_id']) + }) + }) + }) + + it('maintains llmobs parentage separately from apm spans', () => { + llmobs.trace({ kind: 'workflow', name: 'outer-llm' }, outerLLMSpan => { + expect(llmobs._active()).to.equal(outerLLMSpan) + tracer.trace('apmSpan', apmSpan => { + expect(llmobs._active()).to.equal(outerLLMSpan) + llmobs.trace({ kind: 'workflow', name: 'inner-llm' }, innerLLMSpan => { + expect(llmobs._active()).to.equal(innerLLMSpan) + + // llmobs span linkage + expect(LLMObsTagger.tagMap.get(innerLLMSpan)['_ml_obs.llmobs_parent_id']) + .to.equal(outerLLMSpan.context().toSpanId()) + + // apm span linkage + expect(innerLLMSpan.context()._parentId.toString(10)).to.equal(apmSpan.context().toSpanId()) + expect(apmSpan.context()._parentId.toString(10)).to.equal(outerLLMSpan.context().toSpanId()) + }) + }) + }) + }) + + // TODO: need to implement custom trace IDs + it.skip('starts different traces for llmobs spans as child spans of an apm root span', () => { + let apmTraceId, traceId1, traceId2 + tracer.trace('apmRootSpan', apmRootSpan => { + apmTraceId = apmRootSpan.context().toTraceId(true) + llmobs.trace('workflow', llmobsSpan1 => { + traceId1 = llmobsSpan1.context()._tags['_ml_obs.trace_id'] + }) + + llmobs.trace('workflow', llmobsSpan2 => { + traceId2 = llmobsSpan2.context()._tags['_ml_obs.trace_id'] + }) + }) + + expect(traceId1).to.not.equal(traceId2) + expect(traceId1).to.not.equal(apmTraceId) + expect(traceId2).to.not.equal(apmTraceId) + }) + + it('maintains the llmobs parentage when error callbacks are used', () => { + llmobs.trace({ kind: 'workflow' }, outer => { + llmobs.trace({ kind: 'task' }, (inner, cb) => { + expect(llmobs._active()).to.equal(inner) + expect(LLMObsTagger.tagMap.get(inner)['_ml_obs.llmobs_parent_id']).to.equal(outer.context().toSpanId()) + cb() // finish the span + }) + + expect(llmobs._active()).to.equal(outer) + + llmobs.trace({ kind: 'task' }, (inner) => { + expect(llmobs._active()).to.equal(inner) + expect(LLMObsTagger.tagMap.get(inner)['_ml_obs.llmobs_parent_id']).to.equal(outer.context().toSpanId()) + }) + }) + }) + }) + }) + + describe('wrap', () => { + describe('tracing behavior', () => { + it('starts a span if llmobs is disabled but does not process it in the LLMObs span processor', () => { + tracer._tracer._config.llmobs.enabled = false + + const fn = llmobs.wrap({ kind: 'workflow' }, (a) => { + expect(a).to.equal(1) + expect(LLMObsTagger.tagMap.get(llmobs._active())).to.not.exist + }) + + expect(() => fn(1)).to.not.throw() + + expect(llmobs._tracer._processor.process).to.have.been.called + expect(LLMObsSpanProcessor.prototype.format).to.not.have.been.called + + tracer._tracer._config.llmobs.enabled = true + }) + + it('throws if the kind is invalid', () => { + expect(() => llmobs.wrap({ kind: 'invalid' }, () => {})).to.throw() + }) + + it('wraps a function', () => { + let span + const fn = llmobs.wrap({ kind: 'workflow' }, () => { + span = tracer.scope().active() + sinon.spy(span, 'finish') + }) + + fn() + + expect(span.finish).to.have.been.called + }) + + it('wraps a function with a callback', () => { + let span + let next + + const fn = llmobs.wrap({ kind: 'workflow' }, (_next) => { + span = tracer.scope().active() + sinon.spy(span, 'finish') + next = _next + }) + + fn(() => {}) + + expect(span.finish).to.not.have.been.called + + next() + + expect(span.finish).to.have.been.called + }) + + it('does not auto-annotate llm spans', () => { + let span + function myLLM (input) { + span = llmobs._active() + return '' + } + + const wrappedMyLLM = llmobs.wrap({ kind: 'llm' }, myLLM) + + wrappedMyLLM('input') + + expect(LLMObsTagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.meta.ml_app': 'mlApp', + '_ml_obs.llmobs_parent_id': 'undefined' + }) + }) + + it('does not auto-annotate embedding spans input', () => { + let span + function myEmbedding (input) { + span = llmobs._active() + return 'output' + } + + const wrappedMyEmbedding = llmobs.wrap({ kind: 'embedding' }, myEmbedding) + + wrappedMyEmbedding('input') + + expect(LLMObsTagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'embedding', + '_ml_obs.meta.ml_app': 'mlApp', + '_ml_obs.llmobs_parent_id': 'undefined', + '_ml_obs.meta.output.value': 'output' + }) + }) + + it('does not auto-annotate retrieval spans output', () => { + let span + function myRetrieval (input) { + span = llmobs._active() + return 'output' + } + + const wrappedMyRetrieval = llmobs.wrap({ kind: 'retrieval' }, myRetrieval) + + wrappedMyRetrieval('input') + + expect(LLMObsTagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'retrieval', + '_ml_obs.meta.ml_app': 'mlApp', + '_ml_obs.llmobs_parent_id': 'undefined', + '_ml_obs.meta.input.value': 'input' + }) + }) + + // TODO: need span kind optional for this test + it.skip('sets the span name to "unnamed-anonymous-function" if no name is provided', () => { + let span + const fn = llmobs.wrap({ kind: 'workflow' }, () => { + span = llmobs._active() + }) + + fn() + + expect(span.context()._name).to.equal('unnamed-anonymous-function') + }) + }) + + describe('parentage', () => { + // TODO: need to implement custom trace IDs + it.skip('starts a span with a distinct trace id', () => { + const fn = llmobs.wrap('workflow', { name: 'test' }, () => { + const span = llmobs._active() + expect(span.context()._tags['_ml_obs.trace_id']) + .to.exist.and.to.not.equal(span.context().toTraceId(true)) + }) + + fn() + }) + + it('sets span parentage correctly', () => { + let outerLLMSpan, innerLLMSpan + + function outer () { + outerLLMSpan = llmobs._active() + innerWrapped() + } + + function inner () { + innerLLMSpan = llmobs._active() + expect(LLMObsTagger.tagMap.get(innerLLMSpan)['_ml_obs.llmobs_parent_id']) + .to.equal(outerLLMSpan.context().toSpanId()) + // TODO: need to implement custom trace IDs + // expect(innerLLMSpan.context()._tags['_ml_obs.trace_id']) + // .to.equal(outerLLMSpan.context()._tags['_ml_obs.trace_id']) + } + + const outerWrapped = llmobs.wrap({ kind: 'workflow' }, outer) + const innerWrapped = llmobs.wrap({ kind: 'task' }, inner) + + outerWrapped() + }) + + it('maintains llmobs parentage separately from apm spans', () => { + let outerLLMObsSpan, innerLLMObsSpan + + function outerLLMObs () { + outerLLMObsSpan = llmobs._active() + expect(outerLLMObsSpan).to.equal(tracer.scope().active()) + + apmWrapped() + } + function apm () { + expect(llmobs._active()).to.equal(outerLLMObsSpan) + innerWrapped() + } + function innerLLMObs () { + innerLLMObsSpan = llmobs._active() + expect(innerLLMObsSpan).to.equal(tracer.scope().active()) + expect(LLMObsTagger.tagMap.get(innerLLMObsSpan)['_ml_obs.llmobs_parent_id']) + .to.equal(outerLLMObsSpan.context().toSpanId()) + // TODO: need to implement custom trace IDs + // expect(innerLLMObsSpan.context()._tags['_ml_obs.trace_id']) + // .to.equal(outerLLMObsSpan.context()._tags['_ml_obs.trace_id']) + } + + const outerWrapped = llmobs.wrap({ kind: 'workflow' }, outerLLMObs) + const apmWrapped = tracer.wrap('workflow', apm) + const innerWrapped = llmobs.wrap({ kind: 'workflow' }, innerLLMObs) + + outerWrapped() + }) + + // TODO: need to implement custom trace IDs + it.skip('starts different traces for llmobs spans as child spans of an apm root span', () => { + let traceId1, traceId2, apmTraceId + function apm () { + apmTraceId = tracer.scope().active().context().toTraceId(true) + llmObsWrapped1() + llmObsWrapped2() + } + function llmObs1 () { + traceId1 = LLMObsTagger.tagMap.get(llmobs._active())['_ml_obs.trace_id'] + } + function llmObs2 () { + traceId2 = LLMObsTagger.tagMap.get(llmobs._active())['_ml_obs.trace_id'] + } + + const apmWrapped = tracer.wrap('workflow', apm) + const llmObsWrapped1 = llmobs.wrap({ kind: 'workflow' }, llmObs1) + const llmObsWrapped2 = llmobs.wrap({ kind: 'workflow' }, llmObs2) + + apmWrapped() + + expect(traceId1).to.not.equal(traceId2) + expect(traceId1).to.not.equal(apmTraceId) + expect(traceId2).to.not.equal(apmTraceId) + }) + + it('maintains the llmobs parentage when callbacks are used', () => { + let outerSpan + function outer () { + outerSpan = llmobs._active() + wrappedInner1(() => {}) + expect(outerSpan).to.equal(tracer.scope().active()) + wrappedInner2() + } + + function inner1 (cb) { + const inner = tracer.scope().active() + expect(llmobs._active()).to.equal(inner) + expect(LLMObsTagger.tagMap.get(inner)['_ml_obs.llmobs_parent_id']).to.equal(outerSpan.context().toSpanId()) + cb() + } + + function inner2 () { + const inner = tracer.scope().active() + expect(llmobs._active()).to.equal(inner) + expect(LLMObsTagger.tagMap.get(inner)['_ml_obs.llmobs_parent_id']).to.equal(outerSpan.context().toSpanId()) + } + + const wrappedOuter = llmobs.wrap({ kind: 'workflow' }, outer) + const wrappedInner1 = llmobs.wrap({ kind: 'task' }, inner1) + const wrappedInner2 = llmobs.wrap({ kind: 'task' }, inner2) + + wrappedOuter() + }) + }) + }) + }) + + describe('annotate', () => { + it('returns if llmobs is disabled', () => { + tracer._tracer._config.llmobs.enabled = false + sinon.spy(llmobs, '_active') + llmobs.annotate() + + expect(llmobs._active).to.not.have.been.called + llmobs._active.restore() + + tracer._tracer._config.llmobs.enabled = true + }) + + it('throws if no arguments are provided', () => { + expect(() => llmobs.annotate()).to.throw() + }) + + it('throws if there are no options given', () => { + llmobs.trace({ kind: 'llm', name: 'test' }, span => { + expect(() => llmobs.annotate(span)).to.throw() + + // span should still exist in the registry, just with no annotations + expect(LLMObsTagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.meta.ml_app': 'mlApp', + '_ml_obs.llmobs_parent_id': 'undefined' + }) + }) + }) + + it('throws if the provided span is not an LLMObs span', () => { + tracer.trace('test', span => { + expect(() => llmobs.annotate(span, {})).to.throw() + + // no span in registry, should not throw + expect(LLMObsTagger.tagMap.get(span)).to.not.exist + }) + }) + + it('throws if the span is finished', () => { + sinon.spy(llmobs._tagger, 'tagTextIO') + llmobs.trace({ kind: 'workflow', name: 'outer' }, () => { + let innerLLMSpan + llmobs.trace({ kind: 'task', name: 'inner' }, _span => { + innerLLMSpan = _span + }) + + expect(() => llmobs.annotate(innerLLMSpan, {})).to.throw() + expect(llmobs._tagger.tagTextIO).to.not.have.been.called + }) + llmobs._tagger.tagTextIO.restore() + }) + + it('throws for an llmobs span with an invalid kind', () => { + // TODO this might end up being obsolete with llmobs span kind as optional + sinon.spy(llmobs._tagger, 'tagLLMIO') + llmobs.trace({ kind: 'llm', name: 'test' }, span => { + LLMObsTagger.tagMap.get(span)['_ml_obs.meta.span.kind'] = undefined // somehow this is set + expect(() => llmobs.annotate(span, {})).to.throw() + }) + + expect(llmobs._tagger.tagLLMIO).to.not.have.been.called + llmobs._tagger.tagLLMIO.restore() + }) + + it('annotates the current active llmobs span in an llmobs scope', () => { + sinon.spy(llmobs._tagger, 'tagTextIO') + + llmobs.trace({ kind: 'workflow', name: 'test' }, span => { + const inputData = {} + llmobs.annotate({ inputData }) + + expect(llmobs._tagger.tagTextIO).to.have.been.calledWith(span, inputData, undefined) + }) + + llmobs._tagger.tagTextIO.restore() + }) + + it('annotates the current active llmobs span in an apm scope', () => { + sinon.spy(llmobs._tagger, 'tagTextIO') + + llmobs.trace({ kind: 'workflow', name: 'test' }, llmobsSpan => { + tracer.trace('apmSpan', () => { + const inputData = {} + llmobs.annotate({ inputData }) + + expect(llmobs._tagger.tagTextIO).to.have.been.calledWith(llmobsSpan, inputData, undefined) + }) + }) + + llmobs._tagger.tagTextIO.restore() + }) + + it('annotates llm io for an llm span', () => { + const inputData = [{ role: 'system', content: 'system prompt' }] + const outputData = [{ role: 'ai', content: 'no question was asked' }] + + llmobs.trace({ kind: 'llm', name: 'test' }, span => { + llmobs.annotate({ inputData, outputData }) + + expect(LLMObsTagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.meta.ml_app': 'mlApp', + '_ml_obs.llmobs_parent_id': 'undefined', + '_ml_obs.meta.input.messages': inputData, + '_ml_obs.meta.output.messages': outputData + }) + }) + }) + + it('annotates embedding io for an embedding span', () => { + const inputData = [{ text: 'input text' }] + const outputData = 'documents embedded' + + llmobs.trace({ kind: 'embedding', name: 'test' }, span => { + llmobs.annotate({ inputData, outputData }) + + expect(LLMObsTagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'embedding', + '_ml_obs.meta.ml_app': 'mlApp', + '_ml_obs.llmobs_parent_id': 'undefined', + '_ml_obs.meta.input.documents': inputData, + '_ml_obs.meta.output.value': outputData + }) + }) + }) + + it('annotates retrieval io for a retrieval span', () => { + const inputData = 'input text' + const outputData = [{ text: 'output text' }] + + llmobs.trace({ kind: 'retrieval', name: 'test' }, span => { + llmobs.annotate({ inputData, outputData }) + + expect(LLMObsTagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'retrieval', + '_ml_obs.meta.ml_app': 'mlApp', + '_ml_obs.llmobs_parent_id': 'undefined', + '_ml_obs.meta.input.value': inputData, + '_ml_obs.meta.output.documents': outputData + }) + }) + }) + + it('annotates metadata if present', () => { + const metadata = { response_type: 'json' } + + llmobs.trace({ kind: 'llm', name: 'test' }, span => { + llmobs.annotate({ metadata }) + + expect(LLMObsTagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.meta.ml_app': 'mlApp', + '_ml_obs.llmobs_parent_id': 'undefined', + '_ml_obs.meta.metadata': metadata + }) + }) + }) + + it('annotates metrics if present', () => { + const metrics = { score: 0.6 } + + llmobs.trace({ kind: 'llm', name: 'test' }, span => { + llmobs.annotate({ metrics }) + + expect(LLMObsTagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.meta.ml_app': 'mlApp', + '_ml_obs.llmobs_parent_id': 'undefined', + '_ml_obs.metrics': metrics + }) + }) + }) + + it('annotates tags if present', () => { + const tags = { 'custom.tag': 'value' } + + llmobs.trace({ kind: 'llm', name: 'test' }, span => { + llmobs.annotate({ tags }) + + expect(LLMObsTagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.meta.ml_app': 'mlApp', + '_ml_obs.llmobs_parent_id': 'undefined', + '_ml_obs.tags': tags + }) + }) + }) + }) + + describe('exportSpan', () => { + it('throws if no span is provided', () => { + expect(() => llmobs.exportSpan()).to.throw() + }) + + it('throws if the provided span is not an LLMObs span', () => { + tracer.trace('test', span => { + expect(() => llmobs.exportSpan(span)).to.throw() + }) + }) + + it('uses the provided span', () => { + llmobs.trace({ kind: 'workflow', name: 'test' }, span => { + const spanCtx = llmobs.exportSpan(span) + + const traceId = span.context().toTraceId(true) + const spanId = span.context().toSpanId() + + expect(spanCtx).to.deep.equal({ traceId, spanId }) + }) + }) + + it('uses the active span in an llmobs scope', () => { + llmobs.trace({ kind: 'workflow', name: 'test' }, span => { + const spanCtx = llmobs.exportSpan() + + const traceId = span.context().toTraceId(true) + const spanId = span.context().toSpanId() + + expect(spanCtx).to.deep.equal({ traceId, spanId }) + }) + }) + + it('uses the active span in an apm scope', () => { + llmobs.trace({ kind: 'workflow', name: 'test' }, llmobsSpan => { + tracer.trace('apmSpan', () => { + const spanCtx = llmobs.exportSpan() + + const traceId = llmobsSpan.context().toTraceId(true) + const spanId = llmobsSpan.context().toSpanId() + + expect(spanCtx).to.deep.equal({ traceId, spanId }) + }) + }) + }) + + it('returns undefined if the provided span is not a span', () => { + llmobs.trace({ kind: 'workflow', name: 'test' }, fakeSpan => { + fakeSpan.context().toTraceId = undefined // something that would throw + LLMObsTagger.tagMap.set(fakeSpan, {}) + const spanCtx = llmobs.exportSpan(fakeSpan) + + expect(spanCtx).to.be.undefined + }) + }) + }) + + describe('submitEvaluation', () => { + let spanCtx + let originalApiKey + + before(() => { + originalApiKey = tracer._tracer._config.apiKey + tracer._tracer._config.apiKey = 'test' + }) + + beforeEach(() => { + spanCtx = { + traceId: '1234', + spanId: '5678' + } + }) + + after(() => { + tracer._tracer._config.apiKey = originalApiKey + }) + + it('does not submit an evaluation if llmobs is disabled', () => { + tracer._tracer._config.llmobs.enabled = false + llmobs.submitEvaluation() + + expect(LLMObsEvalMetricsWriter.prototype.append).to.not.have.been.called + + tracer._tracer._config.llmobs.enabled = true + }) + + it('throws for a missing API key', () => { + const apiKey = tracer._tracer._config.apiKey + delete tracer._tracer._config.apiKey + + expect(() => llmobs.submitEvaluation(spanCtx)).to.throw() + expect(LLMObsEvalMetricsWriter.prototype.append).to.not.have.been.called + + tracer._tracer._config.apiKey = apiKey + }) + + it('throws for an invalid span context', () => { + const invalid = {} + + expect(() => llmobs.submitEvaluation(invalid, {})).to.throw() + expect(LLMObsEvalMetricsWriter.prototype.append).to.not.have.been.called + }) + + it('throws for a missing mlApp', () => { + const mlApp = tracer._tracer._config.llmobs.mlApp + delete tracer._tracer._config.llmobs.mlApp + + expect(() => llmobs.submitEvaluation(spanCtx)).to.throw() + expect(LLMObsEvalMetricsWriter.prototype.append).to.not.have.been.called + + tracer._tracer._config.llmobs.mlApp = mlApp + }) + + it('throws for an invalid timestamp', () => { + expect(() => { + llmobs.submitEvaluation(spanCtx, { + mlApp: 'test', + timestampMs: 'invalid' + }) + }).to.throw() + expect(LLMObsEvalMetricsWriter.prototype.append).to.not.have.been.called + }) + + it('throws for a missing label', () => { + expect(() => { + llmobs.submitEvaluation(spanCtx, { + mlApp: 'test', + timestampMs: 1234 + }) + }).to.throw() + expect(LLMObsEvalMetricsWriter.prototype.append).to.not.have.been.called + }) + + it('throws for an invalid metric type', () => { + expect(() => { + llmobs.submitEvaluation(spanCtx, { + mlApp: 'test', + timestampMs: 1234, + label: 'test', + metricType: 'invalid' + }) + }).to.throw() + expect(LLMObsEvalMetricsWriter.prototype.append).to.not.have.been.called + }) + + it('throws for a mismatched value for a categorical metric', () => { + expect(() => { + llmobs.submitEvaluation(spanCtx, { + mlApp: 'test', + timestampMs: 1234, + label: 'test', + metricType: 'categorical', + value: 1 + }) + }).to.throw() + expect(LLMObsEvalMetricsWriter.prototype.append).to.not.have.been.called + }) + + it('throws for a mismatched value for a score metric', () => { + expect(() => { + llmobs.submitEvaluation(spanCtx, { + mlApp: 'test', + timestampMs: 1234, + label: 'test', + metricType: 'score', + value: 'string' + }) + }).to.throw() + + expect(LLMObsEvalMetricsWriter.prototype.append).to.not.have.been.called + }) + + it('submits an evaluation metric', () => { + llmobs.submitEvaluation(spanCtx, { + mlApp: 'test', + timestampMs: 1234, + label: 'test', + metricType: 'score', + value: 0.6, + tags: { + host: 'localhost' + } + }) + + expect(LLMObsEvalMetricsWriter.prototype.append.getCall(0).args[0]).to.deep.equal({ + trace_id: spanCtx.traceId, + span_id: spanCtx.spanId, + ml_app: 'test', + timestamp_ms: 1234, + label: 'test', + metric_type: 'score', + score_value: 0.6, + tags: [`dd-trace.version:${tracerVersion}`, 'ml_app:test', 'host:localhost'] + }) + }) + + it('sets `categorical_value` for categorical metrics', () => { + llmobs.submitEvaluation(spanCtx, { + mlApp: 'test', + timestampMs: 1234, + label: 'test', + metricType: 'categorical', + value: 'foo', + tags: { + host: 'localhost' + } + }) + + expect(LLMObsEvalMetricsWriter.prototype.append.getCall(0).args[0]).to.have.property('categorical_value', 'foo') + }) + + it('defaults to the current time if no timestamp is provided', () => { + sinon.stub(Date, 'now').returns(1234) + llmobs.submitEvaluation(spanCtx, { + mlApp: 'test', + label: 'test', + metricType: 'score', + value: 0.6 + }) + + expect(LLMObsEvalMetricsWriter.prototype.append.getCall(0).args[0]).to.have.property('timestamp_ms', 1234) + Date.now.restore() + }) + }) + + describe('flush', () => { + it('does not flush if llmobs is disabled', () => { + tracer._tracer._config.llmobs.enabled = false + llmobs.flush() + + expect(LLMObsEvalMetricsWriter.prototype.flush).to.not.have.been.called + expect(LLMObsAgentProxySpanWriter.prototype.flush).to.not.have.been.called + tracer._tracer._config.llmobs.enabled = true + }) + + it('flushes the evaluation writer and span writer', () => { + llmobs.flush() + + expect(LLMObsEvalMetricsWriter.prototype.flush).to.have.been.called + expect(LLMObsAgentProxySpanWriter.prototype.flush).to.have.been.called + }) + + it('logs if there was an error flushing', () => { + LLMObsEvalMetricsWriter.prototype.flush.throws(new Error('boom')) + + expect(() => llmobs.flush()).to.not.throw() + }) + }) + + describe('distributed', () => { + it('adds the current llmobs span id to the injection context', () => { + const carrier = { 'x-datadog-tags': '' } + let parentId + llmobs.trace({ kind: 'workflow', name: 'myWorkflow' }, span => { + parentId = span.context().toSpanId() + + // simulate injection from http integration or from tracer + // something that triggers the text_map injection + injectCh.publish({ carrier }) + }) + + expect(carrier['x-datadog-tags']).to.equal(`,_dd.p.llmobs_parent_id=${parentId}`) + }) + }) +}) diff --git a/packages/dd-trace/test/llmobs/sdk/integration.spec.js b/packages/dd-trace/test/llmobs/sdk/integration.spec.js new file mode 100644 index 00000000000..acba94d8f71 --- /dev/null +++ b/packages/dd-trace/test/llmobs/sdk/integration.spec.js @@ -0,0 +1,256 @@ +'use strict' + +const { expectedLLMObsNonLLMSpanEvent, deepEqualWithMockValues } = require('../util') +const chai = require('chai') + +chai.Assertion.addMethod('deepEqualWithMockValues', deepEqualWithMockValues) + +const tags = { + ml_app: 'test', + language: 'javascript' +} + +const AgentProxyWriter = require('../../../src/llmobs/writers/spans/agentProxy') +const EvalMetricsWriter = require('../../../src/llmobs/writers/evaluations') + +const tracerVersion = require('../../../../../package.json').version + +describe('end to end sdk integration tests', () => { + let tracer + let llmobs + let payloadGenerator + + function run (payloadGenerator) { + payloadGenerator() + return { + spans: tracer._tracer._processor.process.args.map(args => args[0]).reverse(), // spans finish in reverse order + llmobsSpans: AgentProxyWriter.prototype.append.args?.map(args => args[0]), + evaluationMetrics: EvalMetricsWriter.prototype.append.args?.map(args => args[0]) + } + } + + function check (expected, actual) { + for (const expectedLLMObsSpanIdx in expected) { + const expectedLLMObsSpan = expected[expectedLLMObsSpanIdx] + const actualLLMObsSpan = actual[expectedLLMObsSpanIdx] + expect(actualLLMObsSpan).to.deep.deepEqualWithMockValues(expectedLLMObsSpan) + } + } + + before(() => { + tracer = require('../../../../dd-trace') + tracer.init({ + llmobs: { + mlApp: 'test' + } + }) + + // another test suite may have disabled LLMObs + // to clear the intervals and unsubscribe + // in that case, the `init` call above won't have re-enabled it + // we'll re-enable it here + llmobs = tracer.llmobs + if (!llmobs.enabled) { + llmobs.enable({ + mlApp: 'test' + }) + } + + tracer._tracer._config.apiKey = 'test' + + sinon.spy(tracer._tracer._processor, 'process') + sinon.stub(AgentProxyWriter.prototype, 'append') + sinon.stub(EvalMetricsWriter.prototype, 'append') + }) + + afterEach(() => { + tracer._tracer._processor.process.resetHistory() + AgentProxyWriter.prototype.append.resetHistory() + EvalMetricsWriter.prototype.append.resetHistory() + + process.removeAllListeners('beforeExit') + + llmobs.disable() + llmobs.enable({ mlApp: 'test', apiKey: 'test' }) + }) + + after(() => { + sinon.restore() + llmobs.disable() + delete global._ddtrace + delete require.cache[require.resolve('../../../../dd-trace')] + }) + + it('uses trace correctly', () => { + payloadGenerator = function () { + const result = llmobs.trace({ kind: 'agent' }, () => { + llmobs.annotate({ inputData: 'hello', outputData: 'world', metadata: { foo: 'bar' } }) + return tracer.trace('apmSpan', () => { + llmobs.annotate({ tags: { bar: 'baz' } }) // should use the current active llmobs span + return llmobs.trace({ kind: 'workflow', name: 'myWorkflow' }, () => { + llmobs.annotate({ inputData: 'world', outputData: 'hello' }) + return 'boom' + }) + }) + }) + + expect(result).to.equal('boom') + } + + const { spans, llmobsSpans } = run(payloadGenerator) + expect(spans).to.have.lengthOf(3) + expect(llmobsSpans).to.have.lengthOf(2) + + const expected = [ + expectedLLMObsNonLLMSpanEvent({ + span: spans[0], + spanKind: 'agent', + tags: { ...tags, bar: 'baz' }, + metadata: { foo: 'bar' }, + inputValue: 'hello', + outputValue: 'world' + }), + expectedLLMObsNonLLMSpanEvent({ + span: spans[2], + spanKind: 'workflow', + parentId: spans[0].context().toSpanId(), + tags, + name: 'myWorkflow', + inputValue: 'world', + outputValue: 'hello' + }) + ] + + check(expected, llmobsSpans) + }) + + it('uses wrap correctly', () => { + payloadGenerator = function () { + function agent (input) { + llmobs.annotate({ inputData: 'hello' }) + return apm(input) + } + // eslint-disable-next-line no-func-assign + agent = llmobs.wrap({ kind: 'agent' }, agent) + + function apm (input) { + llmobs.annotate({ metadata: { foo: 'bar' } }) // should annotate the agent span + return workflow(input) + } + // eslint-disable-next-line no-func-assign + apm = tracer.wrap('apm', apm) + + function workflow () { + llmobs.annotate({ outputData: 'custom' }) + return 'world' + } + // eslint-disable-next-line no-func-assign + workflow = llmobs.wrap({ kind: 'workflow', name: 'myWorkflow' }, workflow) + + agent('my custom input') + } + + const { spans, llmobsSpans } = run(payloadGenerator) + expect(spans).to.have.lengthOf(3) + expect(llmobsSpans).to.have.lengthOf(2) + + const expected = [ + expectedLLMObsNonLLMSpanEvent({ + span: spans[0], + spanKind: 'agent', + tags, + inputValue: 'hello', + outputValue: 'world', + metadata: { foo: 'bar' } + }), + expectedLLMObsNonLLMSpanEvent({ + span: spans[2], + spanKind: 'workflow', + parentId: spans[0].context().toSpanId(), + tags, + name: 'myWorkflow', + inputValue: 'my custom input', + outputValue: 'custom' + }) + ] + + check(expected, llmobsSpans) + }) + + it('instruments and uninstruments as needed', () => { + payloadGenerator = function () { + llmobs.disable() + llmobs.trace({ kind: 'agent', name: 'llmobsParent' }, () => { + llmobs.annotate({ inputData: 'hello', outputData: 'world' }) + llmobs.enable({ mlApp: 'test1' }) + llmobs.trace({ kind: 'workflow', name: 'child1' }, () => { + llmobs.disable() + llmobs.trace({ kind: 'workflow', name: 'child2' }, () => { + llmobs.enable({ mlApp: 'test2' }) + llmobs.trace({ kind: 'workflow', name: 'child3' }, () => {}) + }) + }) + }) + } + + const { spans, llmobsSpans } = run(payloadGenerator) + expect(spans).to.have.lengthOf(4) + expect(llmobsSpans).to.have.lengthOf(2) + + const expected = [ + expectedLLMObsNonLLMSpanEvent({ + span: spans[1], + spanKind: 'workflow', + tags: { ...tags, ml_app: 'test1' }, + name: 'child1' + }), + expectedLLMObsNonLLMSpanEvent({ + span: spans[3], + spanKind: 'workflow', + tags: { ...tags, ml_app: 'test2' }, + name: 'child3', + parentId: spans[1].context().toSpanId() + }) + ] + + check(expected, llmobsSpans) + }) + + it('submits evaluations', () => { + sinon.stub(Date, 'now').returns(1234567890) + payloadGenerator = function () { + llmobs.trace({ kind: 'agent', name: 'myAgent' }, () => { + llmobs.annotate({ inputData: 'hello', outputData: 'world' }) + const spanCtx = llmobs.exportSpan() + llmobs.submitEvaluation(spanCtx, { + label: 'foo', + metricType: 'categorical', + value: 'bar' + }) + }) + } + + const { spans, llmobsSpans, evaluationMetrics } = run(payloadGenerator) + expect(spans).to.have.lengthOf(1) + expect(llmobsSpans).to.have.lengthOf(1) + expect(evaluationMetrics).to.have.lengthOf(1) + + // check eval metrics content + const exptected = [ + { + trace_id: spans[0].context().toTraceId(true), + span_id: spans[0].context().toSpanId(), + label: 'foo', + metric_type: 'categorical', + categorical_value: 'bar', + ml_app: 'test', + timestamp_ms: 1234567890, + tags: [`dd-trace.version:${tracerVersion}`, 'ml_app:test'] + } + ] + + check(exptected, evaluationMetrics) + + Date.now.restore() + }) +}) diff --git a/packages/dd-trace/test/llmobs/sdk/typescript/index.spec.js b/packages/dd-trace/test/llmobs/sdk/typescript/index.spec.js new file mode 100644 index 00000000000..b792a4fbdb7 --- /dev/null +++ b/packages/dd-trace/test/llmobs/sdk/typescript/index.spec.js @@ -0,0 +1,133 @@ +'use strict' + +const { execSync } = require('child_process') +const { + FakeAgent, + createSandbox, + spawnProc +} = require('../../../../../../integration-tests/helpers') +const chai = require('chai') +const path = require('path') +const { expectedLLMObsNonLLMSpanEvent, deepEqualWithMockValues } = require('../../util') + +chai.Assertion.addMethod('deepEqualWithMockValues', deepEqualWithMockValues) + +function check (expected, actual) { + for (const expectedLLMObsSpanIdx in expected) { + const expectedLLMObsSpan = expected[expectedLLMObsSpanIdx] + const actualLLMObsSpan = actual[expectedLLMObsSpanIdx] + expect(actualLLMObsSpan).to.deep.deepEqualWithMockValues(expectedLLMObsSpan) + } +} + +const testVersions = [ + '^1', + '^2', + '^3', + '^4', + '^5' +] + +const testCases = [ + { + name: 'not initialized', + file: 'noop' + }, + { + name: 'instruments an application with decorators', + file: 'index', + setup: (agent, results = {}) => { + const llmobsRes = agent.assertLlmObsPayloadReceived(({ payload }) => { + results.llmobsSpans = payload.spans + }) + + const apmRes = agent.assertMessageReceived(({ payload }) => { + results.apmSpans = payload + }) + + return [llmobsRes, apmRes] + }, + runTest: ({ llmobsSpans, apmSpans }) => { + const actual = llmobsSpans + const expected = [ + expectedLLMObsNonLLMSpanEvent({ + span: apmSpans[0][0], + spanKind: 'agent', + tags: { + ml_app: 'test', + language: 'javascript' + }, + inputValue: 'this is a', + outputValue: 'test' + }) + ] + + check(expected, actual) + } + } +] + +// a bit of devex to show the version we're actually testing +// so we don't need to know ahead of time +function getLatestVersion (range) { + const command = `npm show typescript@${range} version` + const output = execSync(command, { encoding: 'utf-8' }).trim() + const versions = output.split('\n').map(line => line.split(' ')[1].replace(/'/g, '')) + return versions[versions.length - 1] +} + +describe('typescript', () => { + let agent + let proc + let sandbox + + for (const version of testVersions) { + context(`with version ${getLatestVersion(version)}`, () => { + before(async function () { + this.timeout(20000) + sandbox = await createSandbox( + [`typescript@${version}`], false, ['./packages/dd-trace/test/llmobs/sdk/typescript/*'] + ) + }) + + after(async () => { + await sandbox.remove() + }) + + beforeEach(async () => { + agent = await new FakeAgent().start() + }) + + afterEach(async () => { + proc && proc.kill() + await agent.stop() + }) + + for (const test of testCases) { + const { name, file } = test + it(name, async () => { + const cwd = sandbox.folder + + const results = {} + const waiters = test.setup ? test.setup(agent, results) : [] + + // compile typescript + execSync( + `tsc --target ES6 --experimentalDecorators --module commonjs --sourceMap ${file}.ts`, + { cwd, stdio: 'inherit' } + ) + + proc = await spawnProc( + path.join(cwd, `${file}.js`), + { cwd, env: { DD_TRACE_AGENT_PORT: agent.port } } + ) + + await Promise.all(waiters) + + // some tests just need the file to run, not assert payloads + test.runTest && test.runTest(results) + }) + } + }) + } +}) diff --git a/packages/dd-trace/test/llmobs/sdk/typescript/index.ts b/packages/dd-trace/test/llmobs/sdk/typescript/index.ts new file mode 100644 index 00000000000..9aa320fd92c --- /dev/null +++ b/packages/dd-trace/test/llmobs/sdk/typescript/index.ts @@ -0,0 +1,23 @@ +// @ts-ignore +import tracer from 'dd-trace'; + +const llmobs = tracer.init({ + llmobs: { + mlApp: 'test', + } +}).llmobs; + +class Test { + @llmobs.decorate({ kind: 'agent' }) + runChain (input: string) { + llmobs.annotate({ + inputData: 'this is a', + outputData: 'test' + }) + + return 'world' + } +} + +const test: Test = new Test(); +test.runChain('hello'); diff --git a/packages/dd-trace/test/llmobs/sdk/typescript/noop.ts b/packages/dd-trace/test/llmobs/sdk/typescript/noop.ts new file mode 100644 index 00000000000..e1b7c00837b --- /dev/null +++ b/packages/dd-trace/test/llmobs/sdk/typescript/noop.ts @@ -0,0 +1,19 @@ +// @ts-ignore +import tracer from 'dd-trace'; +import * as assert from 'assert'; +const llmobs = tracer.llmobs; + +class Test { + @llmobs.decorate({ kind: 'agent' }) + runChain (input: string) { + llmobs.annotate({ + inputData: 'this is a', + outputData: 'test' + }) + + return 'world' + } +} + +const test: Test = new Test(); +assert.equal(test.runChain('hello'), 'world') \ No newline at end of file diff --git a/packages/dd-trace/test/llmobs/span_processor.spec.js b/packages/dd-trace/test/llmobs/span_processor.spec.js new file mode 100644 index 00000000000..ae73c4a9677 --- /dev/null +++ b/packages/dd-trace/test/llmobs/span_processor.spec.js @@ -0,0 +1,360 @@ +'use strict' + +const { expect } = require('chai') +const proxyquire = require('proxyquire') + +// we will use this to populate the span-tags map +const LLMObsTagger = require('../../src/llmobs/tagger') + +describe('span processor', () => { + let LLMObsSpanProcessor + let processor + let writer + let log + + beforeEach(() => { + writer = { + append: sinon.stub() + } + + log = { + warn: sinon.stub() + } + + LLMObsSpanProcessor = proxyquire('../../src/llmobs/span_processor', { + '../../../../package.json': { version: 'x.y.z' }, + '../log': log + }) + + processor = new LLMObsSpanProcessor({ llmobs: { enabled: true } }) + processor.setWriter(writer) + }) + + describe('process', () => { + let span + + it('should do nothing if llmobs is not enabled', () => { + processor = new LLMObsSpanProcessor({ llmobs: { enabled: false } }) + + expect(() => processor.process({ span })).not.to.throw() + }) + + it('should do nothing if the span is not an llm obs span', () => { + span = { context: () => ({ _tags: {} }) } + + expect(processor._writer.append).to.not.have.been.called + }) + + it('should format the span event for the writer', () => { + span = { + _name: 'test', + _startTime: 0, // this is in ms, will be converted to ns + _duration: 1, // this is in ms, will be converted to ns + context () { + return { + _tags: {}, + toTraceId () { return '123' }, // should not use this + toSpanId () { return '456' } + } + } + } + LLMObsTagger.tagMap.set(span, { + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.meta.model_name': 'myModel', + '_ml_obs.meta.model_provider': 'myProvider', + '_ml_obs.meta.metadata': { foo: 'bar' }, + '_ml_obs.meta.ml_app': 'myApp', + '_ml_obs.meta.input.value': 'input-value', + '_ml_obs.meta.output.value': 'output-value', + '_ml_obs.meta.input.messages': [{ role: 'user', content: 'hello' }], + '_ml_obs.meta.output.messages': [{ role: 'assistant', content: 'world' }], + '_ml_obs.llmobs_parent_id': '1234' + }) + + processor.process({ span }) + const payload = writer.append.getCall(0).firstArg + + expect(payload).to.deep.equal({ + trace_id: '123', + span_id: '456', + parent_id: '1234', + name: 'test', + tags: [ + 'version:', + 'env:', + 'service:', + 'source:integration', + 'ml_app:myApp', + 'dd-trace.version:x.y.z', + 'error:0', + 'language:javascript' + ], + start_ns: 0, + duration: 1000000, + status: 'ok', + meta: { + 'span.kind': 'llm', + model_name: 'myModel', + model_provider: 'myprovider', // should be lowercase + input: { + value: 'input-value', + messages: [{ role: 'user', content: 'hello' }] + }, + output: { + value: 'output-value', + messages: [{ role: 'assistant', content: 'world' }] + }, + metadata: { foo: 'bar' } + }, + metrics: {}, + _dd: { + trace_id: '123', + span_id: '456' + } + }) + + expect(writer.append).to.have.been.calledOnce + }) + + it('removes problematic fields from the metadata', () => { + // problematic fields are circular references or bigints + const metadata = { + bigint: BigInt(1), + deep: { + foo: 'bar' + }, + bar: 'baz' + } + metadata.circular = metadata + metadata.deep.circular = metadata.deep + span = { + context () { + return { + _tags: {}, + toTraceId () { return '123' }, + toSpanId () { return '456' } + } + } + } + + LLMObsTagger.tagMap.set(span, { + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.meta.metadata': metadata + }) + + processor.process({ span }) + const payload = writer.append.getCall(0).firstArg + + expect(payload.meta.metadata).to.deep.equal({ + bar: 'baz', + bigint: 'Unserializable value', + circular: 'Unserializable value', + deep: { foo: 'bar', circular: 'Unserializable value' } + }) + }) + + it('tags output documents for a retrieval span', () => { + span = { + context () { + return { + _tags: {}, + toTraceId () { return '123' }, + toSpanId () { return '456' } + } + } + } + + LLMObsTagger.tagMap.set(span, { + '_ml_obs.meta.span.kind': 'retrieval', + '_ml_obs.meta.output.documents': [{ text: 'hello', name: 'myDoc', id: '1', score: 0.6 }] + }) + + processor.process({ span }) + const payload = writer.append.getCall(0).firstArg + + expect(payload.meta.output.documents).to.deep.equal([{ + text: 'hello', + name: 'myDoc', + id: '1', + score: 0.6 + }]) + }) + + it('tags input documents for an embedding span', () => { + span = { + context () { + return { + _tags: {}, + toTraceId () { return '123' }, + toSpanId () { return '456' } + } + } + } + + LLMObsTagger.tagMap.set(span, { + '_ml_obs.meta.span.kind': 'embedding', + '_ml_obs.meta.input.documents': [{ text: 'hello', name: 'myDoc', id: '1', score: 0.6 }] + }) + + processor.process({ span }) + const payload = writer.append.getCall(0).firstArg + + expect(payload.meta.input.documents).to.deep.equal([{ + text: 'hello', + name: 'myDoc', + id: '1', + score: 0.6 + }]) + }) + + it('defaults model provider to custom', () => { + span = { + context () { + return { + _tags: {}, + toTraceId () { return '123' }, + toSpanId () { return '456' } + } + } + } + + LLMObsTagger.tagMap.set(span, { + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.meta.model_name': 'myModel' + }) + + processor.process({ span }) + const payload = writer.append.getCall(0).firstArg + + expect(payload.meta.model_provider).to.equal('custom') + }) + + it('sets an error appropriately', () => { + span = { + context () { + return { + _tags: { + 'error.message': 'error message', + 'error.type': 'error type', + 'error.stack': 'error stack' + }, + toTraceId () { return '123' }, + toSpanId () { return '456' } + } + } + } + + LLMObsTagger.tagMap.set(span, { + '_ml_obs.meta.span.kind': 'llm' + }) + + processor.process({ span }) + const payload = writer.append.getCall(0).firstArg + + expect(payload.meta['error.message']).to.equal('error message') + expect(payload.meta['error.type']).to.equal('error type') + expect(payload.meta['error.stack']).to.equal('error stack') + expect(payload.status).to.equal('error') + + expect(payload.tags).to.include('error_type:error type') + }) + + it('uses the error itself if the span does not have specific error fields', () => { + span = { + context () { + return { + _tags: { + error: new Error('error message') + }, + toTraceId () { return '123' }, + toSpanId () { return '456' } + } + } + } + + LLMObsTagger.tagMap.set(span, { + '_ml_obs.meta.span.kind': 'llm' + }) + + processor.process({ span }) + const payload = writer.append.getCall(0).firstArg + + expect(payload.meta['error.message']).to.equal('error message') + expect(payload.meta['error.type']).to.equal('Error') + expect(payload.meta['error.stack']).to.exist + expect(payload.status).to.equal('error') + + expect(payload.tags).to.include('error_type:Error') + }) + + it('uses the span name from the tag if provided', () => { + span = { + _name: 'test', + context () { + return { + _tags: {}, + toTraceId () { return '123' }, + toSpanId () { return '456' } + } + } + } + + LLMObsTagger.tagMap.set(span, { + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.name': 'mySpan' + }) + + processor.process({ span }) + const payload = writer.append.getCall(0).firstArg + + expect(payload.name).to.equal('mySpan') + }) + + it('attaches session id if provided', () => { + span = { + context () { + return { + _tags: {}, + toTraceId () { return '123' }, + toSpanId () { return '456' } + } + } + } + + LLMObsTagger.tagMap.set(span, { + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.session_id': '1234' + }) + + processor.process({ span }) + const payload = writer.append.getCall(0).firstArg + + expect(payload.session_id).to.equal('1234') + expect(payload.tags).to.include('session_id:1234') + }) + + it('sets span tags appropriately', () => { + span = { + context () { + return { + _tags: {}, + toTraceId () { return '123' }, + toSpanId () { return '456' } + } + } + } + + LLMObsTagger.tagMap.set(span, { + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.tags': { hostname: 'localhost', foo: 'bar', source: 'mySource' } + }) + + processor.process({ span }) + const payload = writer.append.getCall(0).firstArg + + expect(payload.tags).to.include('foo:bar') + expect(payload.tags).to.include('source:mySource') + expect(payload.tags).to.include('hostname:localhost') + }) + }) +}) diff --git a/packages/dd-trace/test/llmobs/tagger.spec.js b/packages/dd-trace/test/llmobs/tagger.spec.js new file mode 100644 index 00000000000..783ce91bdae --- /dev/null +++ b/packages/dd-trace/test/llmobs/tagger.spec.js @@ -0,0 +1,576 @@ +'use strict' + +const { expect } = require('chai') +const proxyquire = require('proxyquire') + +function unserializbleObject () { + const obj = {} + obj.obj = obj + return obj +} + +describe('tagger', () => { + let span + let spanContext + let Tagger + let tagger + let logger + let util + + beforeEach(() => { + spanContext = { + _tags: {}, + _trace: { tags: {} } + } + + span = { + context () { return spanContext }, + setTag (k, v) { + this.context()._tags[k] = v + } + } + + util = { + generateTraceId: sinon.stub().returns('0123') + } + + logger = { + warn: sinon.stub() + } + + Tagger = proxyquire('../../src/llmobs/tagger', { + '../log': logger, + './util': util + }) + }) + + describe('without softFail', () => { + beforeEach(() => { + tagger = new Tagger({ llmobs: { enabled: true, mlApp: 'my-default-ml-app' } }) + }) + + describe('registerLLMObsSpan', () => { + it('will not set tags if llmobs is not enabled', () => { + tagger = new Tagger({ llmobs: { enabled: false } }) + tagger.registerLLMObsSpan(span, 'llm') + + expect(Tagger.tagMap.get(span)).to.deep.equal(undefined) + }) + + it('tags an llm obs span with basic and default properties', () => { + tagger.registerLLMObsSpan(span, { kind: 'workflow' }) + + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'workflow', + '_ml_obs.meta.ml_app': 'my-default-ml-app', + '_ml_obs.llmobs_parent_id': 'undefined' // no parent id provided + }) + }) + + it('uses options passed in to set tags', () => { + tagger.registerLLMObsSpan(span, { + kind: 'llm', + modelName: 'my-model', + modelProvider: 'my-provider', + sessionId: 'my-session', + mlApp: 'my-app' + }) + + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.meta.model_name': 'my-model', + '_ml_obs.meta.model_provider': 'my-provider', + '_ml_obs.session_id': 'my-session', + '_ml_obs.meta.ml_app': 'my-app', + '_ml_obs.llmobs_parent_id': 'undefined' + }) + }) + + it('uses the name if provided', () => { + tagger.registerLLMObsSpan(span, { kind: 'llm', name: 'my-span-name' }) + + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.meta.ml_app': 'my-default-ml-app', + '_ml_obs.llmobs_parent_id': 'undefined', + '_ml_obs.name': 'my-span-name' + }) + }) + + it('defaults parent id to undefined', () => { + tagger.registerLLMObsSpan(span, { kind: 'llm' }) + + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.meta.ml_app': 'my-default-ml-app', + '_ml_obs.llmobs_parent_id': 'undefined' + }) + }) + + it('uses the parent span if provided to populate fields', () => { + const parentSpan = { + context () { + return { + _tags: { + '_ml_obs.meta.ml_app': 'my-ml-app', + '_ml_obs.session_id': 'my-session' + }, + toSpanId () { return '5678' } + } + } + } + tagger.registerLLMObsSpan(span, { kind: 'llm', parent: parentSpan }) + + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.meta.ml_app': 'my-ml-app', + '_ml_obs.session_id': 'my-session', + '_ml_obs.llmobs_parent_id': '5678' + }) + }) + + it('uses the propagated trace id if provided', () => { + tagger.registerLLMObsSpan(span, { kind: 'llm' }) + + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.meta.ml_app': 'my-default-ml-app', + '_ml_obs.llmobs_parent_id': 'undefined' + }) + }) + + it('uses the propagated parent id if provided', () => { + spanContext._trace.tags['_dd.p.llmobs_parent_id'] = '-567' + + tagger.registerLLMObsSpan(span, { kind: 'llm' }) + + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.span.kind': 'llm', + '_ml_obs.meta.ml_app': 'my-default-ml-app', + '_ml_obs.llmobs_parent_id': '-567' + }) + }) + + it('does not set span type if the LLMObs span kind is falsy', () => { + tagger.registerLLMObsSpan(span, { kind: false }) + + expect(Tagger.tagMap.get(span)).to.be.undefined + }) + }) + + describe('tagMetadata', () => { + it('tags a span with metadata', () => { + tagger._register(span) + tagger.tagMetadata(span, { a: 'foo', b: 'bar' }) + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.metadata': { a: 'foo', b: 'bar' } + }) + }) + }) + + describe('tagMetrics', () => { + it('tags a span with metrics', () => { + tagger._register(span) + tagger.tagMetrics(span, { a: 1, b: 2 }) + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.metrics': { a: 1, b: 2 } + }) + }) + + it('tags maps token metric names appropriately', () => { + tagger._register(span) + tagger.tagMetrics(span, { + inputTokens: 1, + outputTokens: 2, + totalTokens: 3, + foo: 10 + }) + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.metrics': { input_tokens: 1, output_tokens: 2, total_tokens: 3, foo: 10 } + }) + }) + + it('throws for non-number entries', () => { + const metrics = { + a: 1, + b: 'foo', + c: { depth: 1 }, + d: undefined + } + tagger._register(span) + expect(() => tagger.tagMetrics(span, metrics)).to.throw() + }) + }) + + describe('tagSpanTags', () => { + it('sets tags on a span', () => { + const tags = { foo: 'bar' } + tagger._register(span) + tagger.tagSpanTags(span, tags) + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.tags': { foo: 'bar' } + }) + }) + + it('merges tags so they do not overwrite', () => { + Tagger.tagMap.set(span, { '_ml_obs.tags': { a: 1 } }) + const tags = { a: 2, b: 1 } + tagger.tagSpanTags(span, tags) + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.tags': { a: 1, b: 1 } + }) + }) + }) + + describe('tagLLMIO', () => { + it('tags a span with llm io', () => { + const inputData = [ + 'you are an amazing assistant', + { content: 'hello! my name is foobar' }, + { content: 'I am a robot', role: 'assistant' }, + { content: 'I am a human', role: 'user' }, + {} + ] + + const outputData = 'Nice to meet you, human!' + + tagger._register(span) + tagger.tagLLMIO(span, inputData, outputData) + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.input.messages': [ + { content: 'you are an amazing assistant' }, + { content: 'hello! my name is foobar' }, + { content: 'I am a robot', role: 'assistant' }, + { content: 'I am a human', role: 'user' }, + { content: '' } + ], + '_ml_obs.meta.output.messages': [{ content: 'Nice to meet you, human!' }] + }) + }) + + it('throws for a non-object message', () => { + const messages = [ + 5 + ] + + expect(() => tagger.tagLLMIO(span, messages, undefined)).to.throw() + }) + + it('throws for a non-string message content', () => { + const messages = [ + { content: 5 } + ] + + expect(() => tagger.tagLLMIO(span, messages, undefined)).to.throw() + }) + + it('throws for a non-string message role', () => { + const messages = [ + { content: 'a', role: 5 } + ] + + expect(() => tagger.tagLLMIO(span, messages, undefined)).to.throw() + }) + + describe('tagging tool calls appropriately', () => { + it('tags a span with tool calls', () => { + const inputData = [ + { content: 'hello', toolCalls: [{ name: 'tool1' }, { name: 'tool2', arguments: { a: 1, b: 2 } }] }, + { content: 'goodbye', toolCalls: [{ name: 'tool3' }] } + ] + const outputData = [ + { content: 'hi', toolCalls: [{ name: 'tool4' }] } + ] + + tagger._register(span) + tagger.tagLLMIO(span, inputData, outputData) + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.input.messages': [ + { + content: 'hello', + tool_calls: [{ name: 'tool1' }, { name: 'tool2', arguments: { a: 1, b: 2 } }] + }, { + content: 'goodbye', + tool_calls: [{ name: 'tool3' }] + }], + '_ml_obs.meta.output.messages': [{ content: 'hi', tool_calls: [{ name: 'tool4' }] }] + }) + }) + + it('throws for a non-object tool call', () => { + const messages = [ + { content: 'a', toolCalls: 5 } + ] + + expect(() => tagger.tagLLMIO(span, messages, undefined)).to.throw() + }) + + it('throws for a non-string tool name', () => { + const messages = [ + { content: 'a', toolCalls: [{ name: 5 }] } + ] + + expect(() => tagger.tagLLMIO(span, messages, undefined)).to.throw() + }) + + it('throws for a non-object tool arguments', () => { + const messages = [ + { content: 'a', toolCalls: [{ name: 'tool1', arguments: 5 }] } + ] + + expect(() => tagger.tagLLMIO(span, messages, undefined)).to.throw() + }) + + it('throws for a non-string tool id', () => { + const messages = [ + { content: 'a', toolCalls: [{ name: 'tool1', toolId: 5 }] } + ] + + expect(() => tagger.tagLLMIO(span, messages, undefined)).to.throw() + }) + + it('throws for a non-string tool type', () => { + const messages = [ + { content: 'a', toolCalls: [{ name: 'tool1', type: 5 }] } + ] + + expect(() => tagger.tagLLMIO(span, messages, undefined)).to.throw() + }) + + it('logs multiple errors if there are multiple errors for a message and filters it out', () => { + const messages = [ + { content: 'a', toolCalls: [5, { name: 5, type: 7 }], role: 7 } + ] + + expect(() => tagger.tagLLMIO(span, messages, undefined)).to.throw() + }) + }) + }) + + describe('tagEmbeddingIO', () => { + it('tags a span with embedding io', () => { + const inputData = [ + 'my string document', + { text: 'my object document' }, + { text: 'foo', name: 'bar' }, + { text: 'baz', id: 'qux' }, + { text: 'quux', score: 5 }, + { text: 'foo', name: 'bar', id: 'qux', score: 5 } + ] + const outputData = 'embedded documents' + tagger._register(span) + tagger.tagEmbeddingIO(span, inputData, outputData) + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.input.documents': [ + { text: 'my string document' }, + { text: 'my object document' }, + { text: 'foo', name: 'bar' }, + { text: 'baz', id: 'qux' }, + { text: 'quux', score: 5 }, + { text: 'foo', name: 'bar', id: 'qux', score: 5 }], + '_ml_obs.meta.output.value': 'embedded documents' + }) + }) + + it('throws for a non-object document', () => { + const documents = [ + 5 + ] + + expect(() => tagger.tagEmbeddingIO(span, documents, undefined)).to.throw() + }) + + it('throws for a non-string document text', () => { + const documents = [ + { text: 5 } + ] + + expect(() => tagger.tagEmbeddingIO(span, documents, undefined)).to.throw() + }) + + it('throws for a non-string document name', () => { + const documents = [ + { text: 'a', name: 5 } + ] + + expect(() => tagger.tagEmbeddingIO(span, documents, undefined)).to.throw() + }) + + it('throws for a non-string document id', () => { + const documents = [ + { text: 'a', id: 5 } + ] + + expect(() => tagger.tagEmbeddingIO(span, documents, undefined)).to.throw() + }) + + it('throws for a non-number document score', () => { + const documents = [ + { text: 'a', score: '5' } + ] + + expect(() => tagger.tagEmbeddingIO(span, documents, undefined)).to.throw() + }) + }) + + describe('tagRetrievalIO', () => { + it('tags a span with retrieval io', () => { + const inputData = 'some query' + const outputData = [ + 'result 1', + { text: 'result 2' }, + { text: 'foo', name: 'bar' }, + { text: 'baz', id: 'qux' }, + { text: 'quux', score: 5 }, + { text: 'foo', name: 'bar', id: 'qux', score: 5 } + ] + + tagger._register(span) + tagger.tagRetrievalIO(span, inputData, outputData) + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.input.value': 'some query', + '_ml_obs.meta.output.documents': [ + { text: 'result 1' }, + { text: 'result 2' }, + { text: 'foo', name: 'bar' }, + { text: 'baz', id: 'qux' }, + { text: 'quux', score: 5 }, + { text: 'foo', name: 'bar', id: 'qux', score: 5 }] + }) + }) + + it('throws for malformed properties on documents', () => { + const inputData = 'some query' + const outputData = [ + true, + { text: 5 }, + { text: 'foo', name: 5 }, + 'hi', + null, + undefined + ] + + // specific cases of throwing tested with embedding inputs + expect(() => tagger.tagRetrievalIO(span, inputData, outputData)).to.throw() + }) + }) + + describe('tagTextIO', () => { + it('tags a span with text io', () => { + const inputData = { some: 'object' } + const outputData = 'some text' + tagger._register(span) + tagger.tagTextIO(span, inputData, outputData) + expect(Tagger.tagMap.get(span)).to.deep.equal({ + '_ml_obs.meta.input.value': '{"some":"object"}', + '_ml_obs.meta.output.value': 'some text' + }) + }) + + it('throws when the value is not JSON serializable', () => { + const data = unserializbleObject() + expect(() => tagger.tagTextIO(span, data, 'output')).to.throw() + }) + }) + }) + + describe('with softFail', () => { + beforeEach(() => { + tagger = new Tagger({ llmobs: { enabled: true, mlApp: 'my-default-ml-app' } }, true) + }) + + it('logs a warning when an unexpected value is encountered for text tagging', () => { + const data = unserializbleObject() + tagger._register(span) + tagger.tagTextIO(span, data, 'input') + expect(logger.warn).to.have.been.calledOnce + }) + + it('logs a warning when an unexpected value is encountered for metrics tagging', () => { + const metrics = { + a: 1, + b: 'foo' + } + + tagger._register(span) + tagger.tagMetrics(span, metrics) + expect(logger.warn).to.have.been.calledOnce + }) + + describe('tagDocuments', () => { + it('logs a warning when a document is not an object', () => { + const data = [undefined] + tagger._register(span) + tagger.tagEmbeddingIO(span, data, undefined) + expect(logger.warn).to.have.been.calledOnce + }) + + it('logs multiple warnings otherwise', () => { + const documents = [ + { + text: 'a', + name: 5, + id: 7, + score: '5' + } + ] + + tagger._register(span) + tagger.tagEmbeddingIO(span, documents, undefined) + expect(logger.warn.callCount).to.equal(3) + }) + }) + + describe('tagMessages', () => { + it('logs a warning when a message is not an object', () => { + const messages = [5] + tagger._register(span) + tagger.tagLLMIO(span, messages, undefined) + expect(logger.warn).to.have.been.calledOnce + }) + + it('logs multiple warnings otherwise', () => { + const messages = [ + { content: 5, role: 5 } + ] + + tagger._register(span) + tagger.tagLLMIO(span, messages, undefined) + expect(logger.warn.callCount).to.equal(2) + }) + + describe('tool call tagging', () => { + it('logs a warning when a message tool call is not an object', () => { + const messages = [ + { content: 'a', toolCalls: 5 } + ] + + tagger._register(span) + tagger.tagLLMIO(span, messages, undefined) + expect(logger.warn).to.have.been.calledOnce + }) + + it('logs multiple warnings otherwise', () => { + const messages = [ + { + content: 'a', + toolCalls: [ + { + name: 5, + arguments: 'not an object', + toolId: 5, + type: 5 + } + ], + role: 7 + } + ] + + tagger._register(span) + tagger.tagLLMIO(span, messages, undefined) + expect(logger.warn.callCount).to.equal(5) // 4 for tool call + 1 for role + }) + }) + }) + }) +}) diff --git a/packages/dd-trace/test/llmobs/util.js b/packages/dd-trace/test/llmobs/util.js new file mode 100644 index 00000000000..4c3b76da090 --- /dev/null +++ b/packages/dd-trace/test/llmobs/util.js @@ -0,0 +1,201 @@ +'use strict' + +const chai = require('chai') + +const tracerVersion = require('../../../../package.json').version + +const MOCK_STRING = Symbol('string') +const MOCK_NUMBER = Symbol('number') +const MOCK_ANY = Symbol('any') + +function deepEqualWithMockValues (expected) { + const actual = this._obj + + for (const key in actual) { + if (expected[key] === MOCK_STRING) { + new chai.Assertion(typeof actual[key], `key ${key}`).to.equal('string') + } else if (expected[key] === MOCK_NUMBER) { + new chai.Assertion(typeof actual[key], `key ${key}`).to.equal('number') + } else if (expected[key] === MOCK_ANY) { + new chai.Assertion(actual[key], `key ${key}`).to.exist + } else if (Array.isArray(expected[key])) { + const sortedExpected = [...expected[key].sort()] + const sortedActual = [...actual[key].sort()] + new chai.Assertion(sortedActual, `key: ${key}`).to.deep.equal(sortedExpected) + } else if (typeof expected[key] === 'object') { + new chai.Assertion(actual[key], `key: ${key}`).to.deepEqualWithMockValues(expected[key]) + } else { + new chai.Assertion(actual[key], `key: ${key}`).to.equal(expected[key]) + } + } +} + +function expectedLLMObsLLMSpanEvent (options) { + const spanEvent = expectedLLMObsBaseEvent(options) + + const meta = { input: {}, output: {} } + const { + spanKind, + modelName, + modelProvider, + inputMessages, + inputDocuments, + outputMessages, + outputValue, + metadata, + tokenMetrics + } = options + + if (spanKind === 'llm') { + if (inputMessages) meta.input.messages = inputMessages + if (outputMessages) meta.output.messages = outputMessages + } else if (spanKind === 'embedding') { + if (inputDocuments) meta.input.documents = inputDocuments + if (outputValue) meta.output.value = outputValue + } + + if (!spanEvent.meta.input) delete spanEvent.meta.input + if (!spanEvent.meta.output) delete spanEvent.meta.output + + if (modelName) meta.model_name = modelName + if (modelProvider) meta.model_provider = modelProvider + if (metadata) meta.metadata = metadata + + Object.assign(spanEvent.meta, meta) + + if (tokenMetrics) spanEvent.metrics = tokenMetrics + + return spanEvent +} + +function expectedLLMObsNonLLMSpanEvent (options) { + const spanEvent = expectedLLMObsBaseEvent(options) + const { + spanKind, + inputValue, + outputValue, + outputDocuments, + metadata, + tokenMetrics + } = options + + const meta = { input: {}, output: {} } + if (spanKind === 'retrieval') { + if (inputValue) meta.input.value = inputValue + if (outputDocuments) meta.output.documents = outputDocuments + if (outputValue) meta.output.value = outputValue + } + if (inputValue) meta.input.value = inputValue + if (metadata) meta.metadata = metadata + if (outputValue) meta.output.value = outputValue + + if (!spanEvent.meta.input) delete spanEvent.meta.input + if (!spanEvent.meta.output) delete spanEvent.meta.output + + Object.assign(spanEvent.meta, meta) + + if (tokenMetrics) spanEvent.metrics = tokenMetrics + + return spanEvent +} + +function expectedLLMObsBaseEvent ({ + span, + parentId, + name, + spanKind, + tags, + sessionId, + error, + errorType, + errorMessage, + errorStack +} = {}) { + // the `span` could be a raw DatadogSpan or formatted span + const spanName = name || span.name || span._name + const spanId = span.span_id ? fromBuffer(span.span_id) : span.context().toSpanId() + const startNs = span.start ? fromBuffer(span.start, true) : Math.round(span._startTime * 1e6) + const duration = span.duration ? fromBuffer(span.duration, true) : Math.round(span._duration * 1e6) + + const spanEvent = { + trace_id: MOCK_STRING, + span_id: spanId, + parent_id: parentId || 'undefined', + name: spanName, + tags: expectedLLMObsTags({ span, tags, error, errorType, sessionId }), + start_ns: startNs, + duration, + status: error ? 'error' : 'ok', + meta: { 'span.kind': spanKind }, + metrics: {}, + _dd: { + trace_id: MOCK_STRING, + span_id: spanId + } + } + + if (sessionId) spanEvent.session_id = sessionId + + if (error) { + spanEvent.meta['error.type'] = errorType + spanEvent.meta['error.message'] = errorMessage + spanEvent.meta['error.stack'] = errorStack + } + + return spanEvent +} + +function expectedLLMObsTags ({ + span, + error, + errorType, + tags, + sessionId +}) { + tags = tags || {} + + const version = span.meta?.version || span._parentTracer?._version + const env = span.meta?.env || span._parentTracer?._env + const service = span.meta?.service || span._parentTracer?._service + + const spanTags = [ + `version:${version ?? ''}`, + `env:${env ?? ''}`, + `service:${service ?? ''}`, + 'source:integration', + `ml_app:${tags.ml_app}`, + `dd-trace.version:${tracerVersion}` + ] + + if (sessionId) spanTags.push(`session_id:${sessionId}`) + + if (error) { + spanTags.push('error:1') + if (errorType) spanTags.push(`error_type:${errorType}`) + } else { + spanTags.push('error:0') + } + + for (const [key, value] of Object.entries(tags)) { + if (!['version', 'env', 'service', 'ml_app'].includes(key)) { + spanTags.push(`${key}:${value}`) + } + } + + return spanTags +} + +function fromBuffer (spanProperty, isNumber = false) { + const { buffer, offset } = spanProperty + const strVal = buffer.readBigInt64BE(offset).toString() + return isNumber ? Number(strVal) : strVal +} + +module.exports = { + expectedLLMObsLLMSpanEvent, + expectedLLMObsNonLLMSpanEvent, + deepEqualWithMockValues, + MOCK_ANY, + MOCK_NUMBER, + MOCK_STRING +} diff --git a/packages/dd-trace/test/llmobs/util.spec.js b/packages/dd-trace/test/llmobs/util.spec.js new file mode 100644 index 00000000000..063e618c1ef --- /dev/null +++ b/packages/dd-trace/test/llmobs/util.spec.js @@ -0,0 +1,142 @@ +'use strict' + +const { + encodeUnicode, + getFunctionArguments, + validateKind +} = require('../../src/llmobs/util') + +describe('util', () => { + describe('encodeUnicode', () => { + it('should encode unicode characters', () => { + expect(encodeUnicode('πŸ˜€')).to.equal('\\ud83d\\ude00') + }) + + it('should encode only unicode characters in a string', () => { + expect(encodeUnicode('test πŸ˜€')).to.equal('test \\ud83d\\ude00') + }) + }) + + describe('validateKind', () => { + for (const kind of ['llm', 'agent', 'task', 'tool', 'workflow', 'retrieval', 'embedding']) { + it(`should return true for valid kind: ${kind}`, () => { + expect(validateKind(kind)).to.equal(kind) + }) + } + + it('should throw for an empty string', () => { + expect(() => validateKind('')).to.throw() + }) + + it('should throw for an invalid kind', () => { + expect(() => validateKind('invalid')).to.throw() + }) + + it('should throw for an undefined kind', () => { + expect(() => validateKind()).to.throw() + }) + }) + + describe('getFunctionArguments', () => { + describe('functionality', () => { + it('should return undefined for a function without arguments', () => { + expect(getFunctionArguments(() => {})).to.deep.equal(undefined) + }) + + it('should capture a single argument only by its value', () => { + expect(getFunctionArguments((arg) => {}, ['bar'])).to.deep.equal('bar') + }) + + it('should capture multiple arguments by name', () => { + expect(getFunctionArguments((foo, bar) => {}, ['foo', 'bar'])).to.deep.equal({ foo: 'foo', bar: 'bar' }) + }) + + it('should ignore arguments not passed in', () => { + expect(getFunctionArguments((foo, bar, baz) => {}, ['foo', 'bar'])).to.deep.equal({ foo: 'foo', bar: 'bar' }) + }) + + it('should capture spread arguments', () => { + expect( + getFunctionArguments((foo, bar, ...args) => {}, ['foo', 'bar', 1, 2, 3]) + ).to.deep.equal({ foo: 'foo', bar: 'bar', args: [1, 2, 3] }) + }) + }) + + describe('parsing configurations', () => { + it('should parse multiple arguments with single-line comments', () => { + function foo ( + bar, // bar comment + baz // baz comment + ) {} + + expect(getFunctionArguments(foo, ['bar', 'baz'])).to.deep.equal({ bar: 'bar', baz: 'baz' }) + }) + + it('should parse multiple arguments with multi-line comments', () => { + function foo ( + bar, /* bar comment */ + baz /* baz comment */ + ) {} + + expect(getFunctionArguments(foo, ['bar', 'baz'])).to.deep.equal({ bar: 'bar', baz: 'baz' }) + }) + + it('should parse multiple arguments with stacked multi-line comments', () => { + function foo ( + /** + * hello + */ + bar, + /** + * world + */ + baz + ) {} + + expect(getFunctionArguments(foo, ['bar', 'baz'])).to.deep.equal({ bar: 'bar', baz: 'baz' }) + }) + + it('parses when simple default values are present', () => { + function foo (bar = 'baz') {} + + expect(getFunctionArguments(foo, ['bar'])).to.deep.equal('bar') + }) + + it('should ignore the default value when no argument is passed', () => { + function foo (bar = 'baz') {} + + expect(getFunctionArguments(foo, [])).to.deep.equal(undefined) + }) + + it('parses when a default value is a function', () => { + function foo (bar = () => {}, baz = 4) {} + + expect(getFunctionArguments(foo, ['bar'])).to.deep.equal('bar') + }) + + it('parses when a simple object is passed in', () => { + function foo (bar = { baz: 4 }) {} + + expect(getFunctionArguments(foo, ['bar'])).to.deep.equal('bar') + }) + + it('parses when a complex object is passed in', () => { + function foo (bar = { baz: { a: 5, b: { c: 4 } }, bat: 0 }, baz) {} + + expect(getFunctionArguments(foo, [{ bar: 'baz' }, 'baz'])).to.deep.equal({ bar: { bar: 'baz' }, baz: 'baz' }) + }) + + it('parses when one of the arguments is an arrow function', () => { + function foo (fn = (a, b, c) => {}, ctx) {} + + expect(getFunctionArguments(foo, ['fn', 'ctx'])).to.deep.equal({ fn: 'fn', ctx: 'ctx' }) + }) + + it('parses when one of the arguments is a function', () => { + function foo (fn = function (a, b, c) {}, ctx) {} + + expect(getFunctionArguments(foo, ['fn', 'ctx'])).to.deep.equal({ fn: 'fn', ctx: 'ctx' }) + }) + }) + }) +}) diff --git a/packages/dd-trace/test/llmobs/writers/base.spec.js b/packages/dd-trace/test/llmobs/writers/base.spec.js new file mode 100644 index 00000000000..8b971b2748a --- /dev/null +++ b/packages/dd-trace/test/llmobs/writers/base.spec.js @@ -0,0 +1,179 @@ +'use strict' +const { expect } = require('chai') +const proxyquire = require('proxyquire') + +describe('BaseLLMObsWriter', () => { + let BaseLLMObsWriter + let writer + let request + let clock + let options + let logger + + beforeEach(() => { + request = sinon.stub() + logger = { + debug: sinon.stub(), + warn: sinon.stub(), + error: sinon.stub() + } + BaseLLMObsWriter = proxyquire('../../../src/llmobs/writers/base', { + '../../exporters/common/request': request, + '../../log': logger + }) + + clock = sinon.useFakeTimers() + + options = { + endpoint: '/api/v2/llmobs', + intake: 'llmobs-intake.datadoghq.com' + } + }) + + afterEach(() => { + clock.restore() + process.removeAllListeners('beforeExit') + }) + + it('constructs a writer with a url', () => { + writer = new BaseLLMObsWriter(options) + + expect(writer._url.href).to.equal('https://llmobs-intake.datadoghq.com/api/v2/llmobs') + expect(logger.debug).to.have.been.calledWith( + 'Started BaseLLMObsWriter to https://llmobs-intake.datadoghq.com/api/v2/llmobs' + ) + }) + + it('calls flush before the process exits', () => { + writer = new BaseLLMObsWriter(options) + writer.flush = sinon.spy() + + process.emit('beforeExit') + + expect(writer.flush).to.have.been.calledOnce + }) + + it('calls flush at the correct interval', async () => { + writer = new BaseLLMObsWriter(options) + + writer.flush = sinon.spy() + + clock.tick(1000) + + expect(writer.flush).to.have.been.calledOnce + }) + + it('appends an event to the buffer', () => { + writer = new BaseLLMObsWriter(options) + const event = { foo: 'bar–' } + writer.append(event) + + expect(writer._buffer).to.have.lengthOf(1) + expect(writer._buffer[0]).to.deep.equal(event) + expect(writer._bufferSize).to.equal(16) + }) + + it('does not append an event if the buffer is full', () => { + writer = new BaseLLMObsWriter(options) + + for (let i = 0; i < 1000; i++) { + writer.append({ foo: 'bar' }) + } + + writer.append({ foo: 'bar' }) + expect(writer._buffer).to.have.lengthOf(1000) + expect(logger.warn).to.have.been.calledWith('BaseLLMObsWriter event buffer full (limit is 1000), dropping event') + }) + + it('flushes the buffer', () => { + writer = new BaseLLMObsWriter(options) + + const event1 = { foo: 'bar' } + const event2 = { foo: 'baz' } + + writer.append(event1) + writer.append(event2) + + writer.makePayload = (events) => ({ events }) + + // Stub the request function to call its third argument + request.callsFake((url, options, callback) => { + callback(null, null, 202) + }) + + writer.flush() + + expect(request).to.have.been.calledOnce + const calledArgs = request.getCall(0).args + + expect(calledArgs[0]).to.deep.equal(JSON.stringify({ events: [event1, event2] })) + expect(calledArgs[1]).to.deep.equal({ + headers: { + 'Content-Type': 'application/json' + }, + method: 'POST', + url: writer._url, + timeout: 5000 + }) + + expect(logger.debug).to.have.been.calledWith( + 'Sent 2 LLMObs undefined events to https://llmobs-intake.datadoghq.com/api/v2/llmobs' + ) + + expect(writer._buffer).to.have.lengthOf(0) + expect(writer._bufferSize).to.equal(0) + }) + + it('does not flush an empty buffer', () => { + writer = new BaseLLMObsWriter(options) + writer.flush() + + expect(request).to.not.have.been.called + }) + + it('logs errors from the request', () => { + writer = new BaseLLMObsWriter(options) + writer.makePayload = (events) => ({ events }) + + writer.append({ foo: 'bar' }) + + const error = new Error('boom') + request.callsFake((url, options, callback) => { + callback(error) + }) + + writer.flush() + + expect(logger.error).to.have.been.calledWith( + 'Error sending 1 LLMObs undefined events to https://llmobs-intake.datadoghq.com/api/v2/llmobs: boom' + ) + }) + + describe('destroy', () => { + it('destroys the writer', () => { + sinon.spy(global, 'clearInterval') + sinon.spy(process, 'removeListener') + writer = new BaseLLMObsWriter(options) + writer.flush = sinon.stub() + + writer.destroy() + + expect(writer._destroyed).to.be.true + expect(clearInterval).to.have.been.calledWith(writer._periodic) + expect(process.removeListener).to.have.been.calledWith('beforeExit', writer.destroy) + expect(writer.flush).to.have.been.calledOnce + expect(logger.debug) + .to.have.been.calledWith('Stopping BaseLLMObsWriter') + }) + + it('does not destroy more than once', () => { + writer = new BaseLLMObsWriter(options) + + logger.debug.reset() // ignore log from constructor + writer.destroy() + writer.destroy() + + expect(logger.debug).to.have.been.calledOnce + }) + }) +}) diff --git a/packages/dd-trace/test/llmobs/writers/evaluations.spec.js b/packages/dd-trace/test/llmobs/writers/evaluations.spec.js new file mode 100644 index 00000000000..e81955450c4 --- /dev/null +++ b/packages/dd-trace/test/llmobs/writers/evaluations.spec.js @@ -0,0 +1,46 @@ +'use strict' + +describe('LLMObsEvalMetricsWriter', () => { + let LLMObsEvalMetricsWriter + let writer + let flush + + beforeEach(() => { + LLMObsEvalMetricsWriter = require('../../../src/llmobs/writers/evaluations') + flush = sinon.stub() + }) + + afterEach(() => { + process.removeAllListeners('beforeExit') + }) + + it('constructs the writer with the correct values', () => { + writer = new LLMObsEvalMetricsWriter({ + site: 'datadoghq.com', + llmobs: {}, + apiKey: '1234' + }) + + writer.flush = flush // just to stop the beforeExit flush call + + expect(writer._url.href).to.equal('https://api.datadoghq.com/api/intake/llm-obs/v1/eval-metric') + expect(writer._headers['DD-API-KEY']).to.equal('1234') + expect(writer._eventType).to.equal('evaluation_metric') + }) + + it('builds the payload correctly', () => { + writer = new LLMObsEvalMetricsWriter({ + site: 'datadoghq.com', + apiKey: 'test' + }) + + const events = [ + { name: 'test', value: 1 } + ] + + const payload = writer.makePayload(events) + + expect(payload.data.type).to.equal('evaluation_metric') + expect(payload.data.attributes.metrics).to.deep.equal(events) + }) +}) diff --git a/packages/dd-trace/test/llmobs/writers/spans/agentProxy.spec.js b/packages/dd-trace/test/llmobs/writers/spans/agentProxy.spec.js new file mode 100644 index 00000000000..6ed0f150885 --- /dev/null +++ b/packages/dd-trace/test/llmobs/writers/spans/agentProxy.spec.js @@ -0,0 +1,28 @@ +'use stict' + +describe('LLMObsAgentProxySpanWriter', () => { + let LLMObsAgentProxySpanWriter + let writer + + beforeEach(() => { + LLMObsAgentProxySpanWriter = require('../../../../src/llmobs/writers/spans/agentProxy') + }) + + it('is initialized correctly', () => { + writer = new LLMObsAgentProxySpanWriter({ + hostname: '127.0.0.1', + port: 8126 + }) + + expect(writer._url.href).to.equal('http://127.0.0.1:8126/evp_proxy/v2/api/v2/llmobs') + expect(writer._headers['X-Datadog-EVP-Subdomain']).to.equal('llmobs-intake') + }) + + it('is initialized correctly with default hostname', () => { + writer = new LLMObsAgentProxySpanWriter({ + port: 8126 // port will always be defaulted by config + }) + + expect(writer._url.href).to.equal('http://localhost:8126/evp_proxy/v2/api/v2/llmobs') + }) +}) diff --git a/packages/dd-trace/test/llmobs/writers/spans/agentless.spec.js b/packages/dd-trace/test/llmobs/writers/spans/agentless.spec.js new file mode 100644 index 00000000000..e3cf421a3ed --- /dev/null +++ b/packages/dd-trace/test/llmobs/writers/spans/agentless.spec.js @@ -0,0 +1,21 @@ +'use stict' + +describe('LLMObsAgentlessSpanWriter', () => { + let LLMObsAgentlessSpanWriter + let writer + + beforeEach(() => { + LLMObsAgentlessSpanWriter = require('../../../../src/llmobs/writers/spans/agentless') + }) + + it('is initialized correctly', () => { + writer = new LLMObsAgentlessSpanWriter({ + site: 'datadoghq.com', + llmobs: {}, + apiKey: '1234' + }) + + expect(writer._url.href).to.equal('https://llmobs-intake.datadoghq.com/api/v2/llmobs') + expect(writer._headers['DD-API-KEY']).to.equal('1234') + }) +}) diff --git a/packages/dd-trace/test/llmobs/writers/spans/base.spec.js b/packages/dd-trace/test/llmobs/writers/spans/base.spec.js new file mode 100644 index 00000000000..1c9965cd9c2 --- /dev/null +++ b/packages/dd-trace/test/llmobs/writers/spans/base.spec.js @@ -0,0 +1,99 @@ +'use strict' + +const proxyquire = require('proxyquire') + +describe('LLMObsSpanWriter', () => { + let LLMObsSpanWriter + let writer + let options + let logger + + beforeEach(() => { + logger = { + warn: sinon.stub(), + debug: sinon.stub() + } + LLMObsSpanWriter = proxyquire('../../../../src/llmobs/writers/spans/base', { + '../../../log': logger + }) + options = { + endpoint: '/api/v2/llmobs', + intake: 'llmobs-intake.datadoghq.com' + } + }) + + afterEach(() => { + process.removeAllListeners('beforeExit') + }) + + it('is initialized correctly', () => { + writer = new LLMObsSpanWriter(options) + + expect(writer._eventType).to.equal('span') + }) + + it('computes the number of bytes of the appended event', () => { + writer = new LLMObsSpanWriter(options) + + const event = { name: 'test', value: 1 } + const eventSizeBytes = Buffer.from(JSON.stringify(event)).byteLength + + writer.append(event) + + expect(writer._bufferSize).to.equal(eventSizeBytes) + }) + + it('truncates the event if it exceeds the size limit', () => { + writer = new LLMObsSpanWriter(options) + + const event = { + name: 'test', + meta: { + input: { value: 'a'.repeat(1024 * 1024) }, + output: { value: 'a'.repeat(1024 * 1024) } + } + } + + writer.append(event) + + const bufferEvent = writer._buffer[0] + expect(bufferEvent).to.deep.equal({ + name: 'test', + meta: { + input: { value: "[This value has been dropped because this span's size exceeds the 1MB size limit.]" }, + output: { value: "[This value has been dropped because this span's size exceeds the 1MB size limit.]" } + }, + collection_errors: ['dropped_io'] + }) + }) + + it('flushes the queue if the next event will exceed the payload limit', () => { + writer = new LLMObsSpanWriter(options) + writer.flush = sinon.stub() + + writer._bufferSize = (5 << 20) - 1 + writer._buffer = Array.from({ length: 10 }) + const event = { name: 'test', value: 'a'.repeat(1024) } + + writer.append(event) + + expect(writer.flush).to.have.been.calledOnce + expect(logger.debug).to.have.been.calledWith( + 'Flusing queue because queing next event will exceed EvP payload limit' + ) + }) + + it('creates the payload correctly', () => { + writer = new LLMObsSpanWriter(options) + + const events = [ + { name: 'test', value: 1 } + ] + + const payload = writer.makePayload(events) + + expect(payload['_dd.stage']).to.equal('raw') + expect(payload.event_type).to.equal('span') + expect(payload.spans).to.deep.equal(events) + }) +}) diff --git a/packages/dd-trace/test/opentracing/propagation/text_map.spec.js b/packages/dd-trace/test/opentracing/propagation/text_map.spec.js index 58ee69047ba..45ddc905ee4 100644 --- a/packages/dd-trace/test/opentracing/propagation/text_map.spec.js +++ b/packages/dd-trace/test/opentracing/propagation/text_map.spec.js @@ -46,7 +46,8 @@ describe('TextMapPropagator', () => { textMap = { 'x-datadog-trace-id': '123', 'x-datadog-parent-id': '456', - 'ot-baggage-foo': 'bar' + 'ot-baggage-foo': 'bar', + baggage: 'foo=bar' } baggageItems = {} }) @@ -58,6 +59,16 @@ describe('TextMapPropagator', () => { } }) + it('should not crash without spanContext', () => { + const carrier = {} + propagator.inject(null, carrier) + }) + + it('should not crash without carrier', () => { + const spanContext = createContext() + propagator.inject(spanContext, null) + }) + it('should inject the span context into the carrier', () => { const carrier = {} const spanContext = createContext() @@ -67,18 +78,18 @@ describe('TextMapPropagator', () => { expect(carrier).to.have.property('x-datadog-trace-id', '123') expect(carrier).to.have.property('x-datadog-parent-id', '456') expect(carrier).to.have.property('ot-baggage-foo', 'bar') + expect(carrier).to.have.property('baggage', 'foo=bar') }) it('should handle non-string values', () => { const carrier = {} - const spanContext = createContext({ - baggageItems: { - number: 1.23, - bool: true, - array: ['foo', 'bar'], - object: {} - } - }) + const baggageItems = { + number: 1.23, + bool: true, + array: ['foo', 'bar'], + object: {} + } + const spanContext = createContext({ baggageItems }) propagator.inject(spanContext, carrier) @@ -86,6 +97,43 @@ describe('TextMapPropagator', () => { expect(carrier['ot-baggage-bool']).to.equal('true') expect(carrier['ot-baggage-array']).to.equal('foo,bar') expect(carrier['ot-baggage-object']).to.equal('[object Object]') + expect(carrier.baggage).to.be.equal('number=1.23,bool=true,array=foo%2Cbar,object=%5Bobject%20Object%5D') + }) + + it('should handle special characters in baggage', () => { + const carrier = {} + const baggageItems = { + '",;\\()/:<=>?@[]{}πŸΆΓ©ζˆ‘': '",;\\πŸΆΓ©ζˆ‘' + } + const spanContext = createContext({ baggageItems }) + + propagator.inject(spanContext, carrier) + // eslint-disable-next-line max-len + expect(carrier.baggage).to.be.equal('%22%2C%3B%5C%28%29%2F%3A%3C%3D%3E%3F%40%5B%5D%7B%7D%F0%9F%90%B6%C3%A9%E6%88%91=%22%2C%3B%5C%F0%9F%90%B6%C3%A9%E6%88%91') + }) + + it('should drop excess baggage items when there are too many pairs', () => { + const carrier = {} + const baggageItems = {} + for (let i = 0; i < config.baggageMaxItems + 1; i++) { + baggageItems[`key-${i}`] = i + } + const spanContext = createContext({ baggageItems }) + + propagator.inject(spanContext, carrier) + expect(carrier.baggage.split(',').length).to.equal(config.baggageMaxItems) + }) + + it('should drop excess baggage items when the resulting baggage header contains many bytes', () => { + const carrier = {} + const baggageItems = { + raccoon: 'chunky', + foo: Buffer.alloc(config.baggageMaxBytes).toString() + } + const spanContext = createContext({ baggageItems }) + + propagator.inject(spanContext, carrier) + expect(carrier.baggage).to.equal('raccoon=chunky') }) it('should inject an existing sampling priority', () => { @@ -353,9 +401,57 @@ describe('TextMapPropagator', () => { expect(spanContext.toTraceId()).to.equal(carrier['x-datadog-trace-id']) expect(spanContext.toSpanId()).to.equal(carrier['x-datadog-parent-id']) expect(spanContext._baggageItems.foo).to.equal(carrier['ot-baggage-foo']) + expect(spanContext._baggageItems).to.deep.equal({ foo: 'bar' }) expect(spanContext._isRemote).to.equal(true) }) + it('should extract otel baggage items with special characters', () => { + process.env.DD_TRACE_BAGGAGE_ENABLED = true + config = new Config() + propagator = new TextMapPropagator(config) + const carrier = { + 'x-datadog-trace-id': '123', + 'x-datadog-parent-id': '456', + baggage: '%22%2C%3B%5C%28%29%2F%3A%3C%3D%3E%3F%40%5B%5D%7B%7D=%22%2C%3B%5C' + } + const spanContext = propagator.extract(carrier) + expect(spanContext._baggageItems).to.deep.equal({ '",;\\()/:<=>?@[]{}': '",;\\' }) + }) + + it('should not extract baggage when the header is malformed', () => { + const carrierA = { + 'x-datadog-trace-id': '123', + 'x-datadog-parent-id': '456', + baggage: 'no-equal-sign,foo=gets-dropped-because-previous-pair-is-malformed' + } + const spanContextA = propagator.extract(carrierA) + expect(spanContextA._baggageItems).to.deep.equal({}) + + const carrierB = { + 'x-datadog-trace-id': '123', + 'x-datadog-parent-id': '456', + baggage: 'foo=gets-dropped-because-subsequent-pair-is-malformed,=' + } + const spanContextB = propagator.extract(carrierB) + expect(spanContextB._baggageItems).to.deep.equal({}) + + const carrierC = { + 'x-datadog-trace-id': '123', + 'x-datadog-parent-id': '456', + baggage: '=no-key' + } + const spanContextC = propagator.extract(carrierC) + expect(spanContextC._baggageItems).to.deep.equal({}) + + const carrierD = { + 'x-datadog-trace-id': '123', + 'x-datadog-parent-id': '456', + baggage: 'no-value=' + } + const spanContextD = propagator.extract(carrierD) + expect(spanContextD._baggageItems).to.deep.equal({}) + }) + it('should convert signed IDs to unsigned', () => { textMap['x-datadog-trace-id'] = '-123' textMap['x-datadog-parent-id'] = '-456' @@ -492,6 +588,12 @@ describe('TextMapPropagator', () => { expect(first._spanId.toString(16)).to.equal(spanId) }) + it('should not crash with invalid traceparent', () => { + textMap.traceparent = 'invalid' + + propagator.extract(textMap) + }) + it('should always extract tracestate from tracecontext when trace IDs match', () => { textMap.traceparent = '00-0000000000000000000000000000007B-0000000000000456-01' textMap.tracestate = 'other=bleh,dd=t.foo_bar_baz_:abc_!@#$%^&*()_+`-~;s:2;o:foo;t.dm:-4' diff --git a/packages/dd-trace/test/opentracing/span.spec.js b/packages/dd-trace/test/opentracing/span.spec.js index dbb248eb920..87d22114aa1 100644 --- a/packages/dd-trace/test/opentracing/span.spec.js +++ b/packages/dd-trace/test/opentracing/span.spec.js @@ -346,6 +346,40 @@ describe('Span', () => { }) }) + describe('getAllBaggageItems', () => { + it('should get all baggage items', () => { + span = new Span(tracer, processor, prioritySampler, { operationName: 'operation' }) + expect(span.getAllBaggageItems()).to.equal(JSON.stringify({})) + + span._spanContext._baggageItems.foo = 'bar' + span._spanContext._baggageItems.raccoon = 'cute' + expect(span.getAllBaggageItems()).to.equal(JSON.stringify({ + foo: 'bar', + raccoon: 'cute' + })) + }) + }) + + describe('removeBaggageItem', () => { + it('should remove a baggage item', () => { + span = new Span(tracer, processor, prioritySampler, { operationName: 'operation' }) + span._spanContext._baggageItems.foo = 'bar' + expect(span.getBaggageItem('foo')).to.equal('bar') + span.removeBaggageItem('foo') + expect(span.getBaggageItem('foo')).to.be.undefined + }) + }) + + describe('removeAllBaggageItems', () => { + it('should remove all baggage items', () => { + span = new Span(tracer, processor, prioritySampler, { operationName: 'operation' }) + span._spanContext._baggageItems.foo = 'bar' + span._spanContext._baggageItems.raccoon = 'cute' + span.removeAllBaggageItems() + expect(span._spanContext._baggageItems).to.deep.equal({}) + }) + }) + describe('setTag', () => { it('should set a tag', () => { span = new Span(tracer, processor, prioritySampler, { operationName: 'operation' }) diff --git a/packages/dd-trace/test/plugins/externals.json b/packages/dd-trace/test/plugins/externals.json index 9eee111433c..511970f728e 100644 --- a/packages/dd-trace/test/plugins/externals.json +++ b/packages/dd-trace/test/plugins/externals.json @@ -47,6 +47,18 @@ "versions": [">=3"] } ], + "body-parser": [ + { + "name": "express", + "versions": ["^4"] + } + ], + "cookie-parser": [ + { + "name": "express", + "versions": ["^4"] + } + ], "cypress": [ { "name": "cypress", @@ -69,6 +81,10 @@ { "name": "request", "versions": ["2.88.2"] + }, + { + "name": "multer", + "versions": ["^1.4.4-lts.1"] } ], "express-mongo-sanitize": [ diff --git a/packages/dd-trace/test/plugins/helpers.js b/packages/dd-trace/test/plugins/helpers.js index b35793b6664..add1361e167 100644 --- a/packages/dd-trace/test/plugins/helpers.js +++ b/packages/dd-trace/test/plugins/helpers.js @@ -117,11 +117,16 @@ function unbreakThen (promise) { } } +function getNextLineNumber () { + return Number(new Error().stack.split('\n')[2].match(/:(\d+):/)[1]) + 1 +} + module.exports = { breakThen, compare, deepInclude, expectSomeSpan, + getNextLineNumber, resolveNaming, unbreakThen, withDefaults diff --git a/packages/dd-trace/test/plugins/outbound.spec.js b/packages/dd-trace/test/plugins/outbound.spec.js index 5709c789575..2d801cd1f4c 100644 --- a/packages/dd-trace/test/plugins/outbound.spec.js +++ b/packages/dd-trace/test/plugins/outbound.spec.js @@ -3,7 +3,9 @@ require('../setup/tap') const { expect } = require('chai') +const { getNextLineNumber } = require('./helpers') const OutboundPlugin = require('../../src/plugins/outbound') +const parseTags = require('../../../datadog-core/src/utils/src/parse-tags') describe('OuboundPlugin', () => { describe('peer service decision', () => { @@ -157,4 +159,50 @@ describe('OuboundPlugin', () => { }) }) }) + + describe('code origin tags', () => { + let instance = null + + beforeEach(() => { + const tracerStub = { + _tracer: { + startSpan: sinon.stub().returns({ + addTags: sinon.spy() + }) + } + } + instance = new OutboundPlugin(tracerStub) + }) + + it('should not add exit tags to span if codeOriginForSpans.enabled is false', () => { + sinon.stub(instance, '_tracerConfig').value({ codeOriginForSpans: { enabled: false } }) + const span = instance.startSpan('test') + expect(span.addTags).to.not.have.been.called + }) + + it('should add exit tags to span if codeOriginForSpans.enabled is true', () => { + sinon.stub(instance, '_tracerConfig').value({ codeOriginForSpans: { enabled: true } }) + + const lineNumber = String(getNextLineNumber()) + const span = instance.startSpan('test') + + expect(span.addTags).to.have.been.calledOnce + const args = span.addTags.args[0] + expect(args).to.have.property('length', 1) + const tags = parseTags(args[0]) + + expect(tags).to.nested.include({ '_dd.code_origin.type': 'exit' }) + expect(tags._dd.code_origin).to.have.property('frames').to.be.an('array').with.length.above(0) + + for (const frame of tags._dd.code_origin.frames) { + expect(frame).to.have.property('file', __filename) + expect(frame).to.have.property('line').to.match(/^\d+$/) + expect(frame).to.have.property('column').to.match(/^\d+$/) + expect(frame).to.have.property('type').to.a('string') + } + + const topFrame = tags._dd.code_origin.frames[0] + expect(topFrame).to.have.property('line', lineNumber) + }) + }) }) diff --git a/packages/dd-trace/test/plugins/util/inferred_proxy.spec.js b/packages/dd-trace/test/plugins/util/inferred_proxy.spec.js new file mode 100644 index 00000000000..78a8443c91c --- /dev/null +++ b/packages/dd-trace/test/plugins/util/inferred_proxy.spec.js @@ -0,0 +1,260 @@ +'use strict' + +require('../../setup/tap') + +const agent = require('../agent') +const getPort = require('get-port') +const { expect } = require('chai') +const axios = require('axios') + +describe('Inferred Proxy Spans', function () { + let http + let appListener + let controller + let port + + // tap was throwing timeout errors when trying to use hooks like `before`, so instead we just use this function + // and call before the test starts + const loadTest = async function (options) { + process.env.DD_SERVICE = 'aws-server' + process.env.DD_TRACE_INFERRED_PROXY_SERVICES_ENABLED = 'true' + + port = await getPort() + require('../../../../dd-trace') + + await agent.load(['http'], null, options) + + http = require('http') + + const server = new http.Server(async (req, res) => { + controller && await controller(req, res) + if (req.url === '/error') { + res.statusCode = 500 + res.end(JSON.stringify({ message: 'ERROR' })) + } else { + res.writeHead(200) + res.end(JSON.stringify({ message: 'OK' })) + } + }) + + appListener = server.listen(port, '127.0.0.1') + } + + // test cleanup function + const cleanupTest = function () { + appListener && appListener.close() + try { + agent.close({ ritmReset: false }) + } catch { + // pass + } + } + + const inferredHeaders = { + 'x-dd-proxy': 'aws-apigateway', + 'x-dd-proxy-request-time-ms': '1729780025473', + 'x-dd-proxy-path': '/test', + 'x-dd-proxy-httpmethod': 'GET', + 'x-dd-proxy-domain-name': 'example.com', + 'x-dd-proxy-stage': 'dev' + } + + describe('without configuration', () => { + it('should create a parent span and a child span for a 200', async () => { + await loadTest({}) + + await axios.get(`http://127.0.0.1:${port}/`, { + headers: inferredHeaders + }) + + await agent.use(traces => { + for (const trace of traces) { + try { + const spans = trace + + expect(spans.length).to.be.equal(2) + + expect(spans[0]).to.have.property('name', 'aws.apigateway') + expect(spans[0]).to.have.property('service', 'example.com') + expect(spans[0]).to.have.property('resource', 'GET /test') + expect(spans[0]).to.have.property('type', 'web') + expect(spans[0].meta).to.have.property('http.url', 'example.com/test') + expect(spans[0].meta).to.have.property('http.method', 'GET') + expect(spans[0].meta).to.have.property('http.status_code', '200') + expect(spans[0].meta).to.have.property('http.route', '/test') + expect(spans[0].meta).to.have.property('span.kind', 'internal') + expect(spans[0].meta).to.have.property('component', 'aws-apigateway') + expect(spans[0].meta).to.have.property('_dd.inferred_span', '1') + expect(spans[0].start.toString()).to.be.equal('1729780025472999936') + + expect(spans[0].span_id.toString()).to.be.equal(spans[1].parent_id.toString()) + + expect(spans[1]).to.have.property('name', 'web.request') + expect(spans[1]).to.have.property('service', 'aws-server') + expect(spans[1]).to.have.property('type', 'web') + expect(spans[1]).to.have.property('resource', 'GET') + expect(spans[1].meta).to.have.property('component', 'http') + expect(spans[1].meta).to.have.property('span.kind', 'server') + expect(spans[1].meta).to.have.property('http.url', `http://127.0.0.1:${port}/`) + expect(spans[1].meta).to.have.property('http.method', 'GET') + expect(spans[1].meta).to.have.property('http.status_code', '200') + expect(spans[1].meta).to.have.property('span.kind', 'server') + break + } catch { + continue + } + } + }).then(cleanupTest).catch(cleanupTest) + }) + + it('should create a parent span and a child span for an error', async () => { + await loadTest({}) + + await axios.get(`http://127.0.0.1:${port}/error`, { + headers: inferredHeaders, + validateStatus: function (status) { + return status === 500 + } + }) + + await agent.use(traces => { + for (const trace of traces) { + try { + const spans = trace + expect(spans.length).to.be.equal(2) + + expect(spans[0]).to.have.property('name', 'aws.apigateway') + expect(spans[0]).to.have.property('service', 'example.com') + expect(spans[0]).to.have.property('resource', 'GET /test') + expect(spans[0]).to.have.property('type', 'web') + expect(spans[0].meta).to.have.property('http.url', 'example.com/test') + expect(spans[0].meta).to.have.property('http.method', 'GET') + expect(spans[0].meta).to.have.property('http.status_code', '500') + expect(spans[0].meta).to.have.property('http.route', '/test') + expect(spans[0].meta).to.have.property('span.kind', 'internal') + expect(spans[0].meta).to.have.property('component', 'aws-apigateway') + expect(spans[0].error).to.be.equal(1) + expect(spans[0].start.toString()).to.be.equal('1729780025472999936') + expect(spans[0].span_id.toString()).to.be.equal(spans[1].parent_id.toString()) + + expect(spans[1]).to.have.property('name', 'web.request') + expect(spans[1]).to.have.property('service', 'aws-server') + expect(spans[1]).to.have.property('type', 'web') + expect(spans[1]).to.have.property('resource', 'GET') + expect(spans[1].meta).to.have.property('component', 'http') + expect(spans[1].meta).to.have.property('span.kind', 'server') + expect(spans[1].meta).to.have.property('http.url', `http://127.0.0.1:${port}/error`) + expect(spans[1].meta).to.have.property('http.method', 'GET') + expect(spans[1].meta).to.have.property('http.status_code', '500') + expect(spans[1].meta).to.have.property('span.kind', 'server') + expect(spans[1].error).to.be.equal(1) + break + } catch { + continue + } + } + }).then(cleanupTest).catch(cleanupTest) + }) + + it('should not create an API Gateway span if all necessary headers are missing', async () => { + await loadTest({}) + + await axios.get(`http://127.0.0.1:${port}/no-aws-headers`, { + headers: {} + }) + + await agent.use(traces => { + for (const trace of traces) { + try { + const spans = trace + expect(spans.length).to.be.equal(1) + + expect(spans[0]).to.have.property('name', 'web.request') + expect(spans[0]).to.have.property('service', 'aws-server') + expect(spans[0]).to.have.property('type', 'web') + expect(spans[0]).to.have.property('resource', 'GET') + expect(spans[0].meta).to.have.property('component', 'http') + expect(spans[0].meta).to.have.property('span.kind', 'server') + expect(spans[0].meta).to.have.property('http.url', `http://127.0.0.1:${port}/no-aws-headers`) + expect(spans[0].meta).to.have.property('http.method', 'GET') + expect(spans[0].meta).to.have.property('http.status_code', '200') + expect(spans[0].meta).to.have.property('span.kind', 'server') + expect(spans[0].error).to.be.equal(0) + break + } catch { + continue + } + } + }).then(cleanupTest).catch(cleanupTest) + }) + + it('should not create an API Gateway span if missing the proxy system header', async () => { + await loadTest({}) + + // remove x-dd-proxy from headers + const { 'x-dd-proxy': _, ...newHeaders } = inferredHeaders + + await axios.get(`http://127.0.0.1:${port}/a-few-aws-headers`, { + headers: newHeaders + }) + + await agent.use(traces => { + for (const trace of traces) { + try { + const spans = trace + expect(spans.length).to.be.equal(1) + + expect(spans[0]).to.have.property('name', 'web.request') + expect(spans[0]).to.have.property('service', 'aws-server') + expect(spans[0]).to.have.property('type', 'web') + expect(spans[0]).to.have.property('resource', 'GET') + expect(spans[0].meta).to.have.property('component', 'http') + expect(spans[0].meta).to.have.property('span.kind', 'server') + expect(spans[0].meta).to.have.property('http.url', `http://127.0.0.1:${port}/a-few-aws-headers`) + expect(spans[0].meta).to.have.property('http.method', 'GET') + expect(spans[0].meta).to.have.property('http.status_code', '200') + expect(spans[0].meta).to.have.property('span.kind', 'server') + expect(spans[0].error).to.be.equal(0) + break + } catch { + continue + } + } + }).then(cleanupTest).catch(cleanupTest) + }) + }) + + describe('with configuration', function () { + it('should not create a span when configured to be off', async () => { + await loadTest({ inferredProxyServicesEnabled: false }) + + await axios.get(`http://127.0.0.1:${port}/configured-off`, { + headers: inferredHeaders + }) + + await agent.use(traces => { + for (const trace of traces) { + try { + const spans = trace + + expect(spans.length).to.be.equal(1) + + expect(spans[0]).to.have.property('name', 'web.request') + expect(spans[0]).to.have.property('service', 'aws-server') + expect(spans[0]).to.have.property('type', 'web') + expect(spans[0]).to.have.property('resource', 'GET') + expect(spans[0].meta).to.have.property('component', 'http') + expect(spans[0].meta).to.have.property('span.kind', 'server') + expect(spans[0].meta).to.have.property('http.url', `http://127.0.0.1:${port}/configured-off`) + expect(spans[0].meta).to.have.property('http.method', 'GET') + expect(spans[0].meta).to.have.property('http.status_code', '200') + expect(spans[0].meta).to.have.property('span.kind', 'server') + break + } catch { + continue + } + } + }).then(cleanupTest).catch(cleanupTest) + }) + }) +}) diff --git a/packages/dd-trace/test/plugins/util/stacktrace.spec.js b/packages/dd-trace/test/plugins/util/stacktrace.spec.js index 3fefc2b29ef..a96ed87f965 100644 --- a/packages/dd-trace/test/plugins/util/stacktrace.spec.js +++ b/packages/dd-trace/test/plugins/util/stacktrace.spec.js @@ -1,6 +1,7 @@ 'use strict' const { isAbsolute } = require('path') +const { getNextLineNumber } = require('../helpers') require('../../setup/tap') @@ -62,7 +63,3 @@ describe('stacktrace utils', () => { }) }) }) - -function getNextLineNumber () { - return Number(new Error().stack.split('\n')[2].match(/:(\d+):/)[1]) + 1 -} diff --git a/packages/dd-trace/test/priority_sampler.spec.js b/packages/dd-trace/test/priority_sampler.spec.js index 5000d81ff09..88c134a5758 100644 --- a/packages/dd-trace/test/priority_sampler.spec.js +++ b/packages/dd-trace/test/priority_sampler.spec.js @@ -11,7 +11,8 @@ const { SAMPLING_MECHANISM_MANUAL, SAMPLING_MECHANISM_REMOTE_USER, SAMPLING_MECHANISM_REMOTE_DYNAMIC, - DECISION_MAKER_KEY + DECISION_MAKER_KEY, + SAMPLING_MECHANISM_APPSEC } = require('../src/constants') const SERVICE_NAME = ext.tags.SERVICE_NAME @@ -451,4 +452,61 @@ describe('PrioritySampler', () => { expect(context._sampling.mechanism).to.equal(SAMPLING_MECHANISM_AGENT) }) }) + + describe('setPriority', () => { + it('should set sampling priority and default mechanism', () => { + prioritySampler.setPriority(span, USER_KEEP) + + expect(context._sampling.priority).to.equal(USER_KEEP) + expect(context._sampling.mechanism).to.equal(SAMPLING_MECHANISM_MANUAL) + }) + + it('should set sampling priority and mechanism', () => { + prioritySampler.setPriority(span, USER_KEEP, SAMPLING_MECHANISM_APPSEC) + + expect(context._sampling.priority).to.equal(USER_KEEP) + expect(context._sampling.mechanism).to.equal(SAMPLING_MECHANISM_APPSEC) + }) + + it('should filter out invalid priorities', () => { + prioritySampler.setPriority(span, 42) + + expect(context._sampling.priority).to.be.undefined + expect(context._sampling.mechanism).to.be.undefined + }) + + it('should add decision maker tag if not set before', () => { + prioritySampler.setPriority(span, USER_KEEP, SAMPLING_MECHANISM_APPSEC) + + expect(context._trace.tags[DECISION_MAKER_KEY]).to.equal('-5') + }) + + it('should override previous priority but mantain previous decision maker tag', () => { + prioritySampler.sample(span) + + prioritySampler.setPriority(span, USER_KEEP, SAMPLING_MECHANISM_APPSEC) + + expect(context._sampling.priority).to.equal(USER_KEEP) + expect(context._sampling.mechanism).to.equal(SAMPLING_MECHANISM_APPSEC) + expect(context._trace.tags[DECISION_MAKER_KEY]).to.equal('-0') + }) + }) + + describe('keepTrace', () => { + it('should not fail if no _prioritySampler', () => { + expect(() => { + PrioritySampler.keepTrace(span, SAMPLING_MECHANISM_APPSEC) + }).to.not.throw() + }) + + it('should call setPriority with span USER_KEEP and mechanism', () => { + const setPriority = sinon.stub(prioritySampler, 'setPriority') + + span._prioritySampler = prioritySampler + + PrioritySampler.keepTrace(span, SAMPLING_MECHANISM_APPSEC) + + expect(setPriority).to.be.calledOnceWithExactly(span, USER_KEEP, SAMPLING_MECHANISM_APPSEC) + }) + }) }) diff --git a/packages/dd-trace/test/profiling/exporters/agent.spec.js b/packages/dd-trace/test/profiling/exporters/agent.spec.js index b318456eebd..93ff52468f1 100644 --- a/packages/dd-trace/test/profiling/exporters/agent.spec.js +++ b/packages/dd-trace/test/profiling/exporters/agent.spec.js @@ -17,7 +17,6 @@ const WallProfiler = require('../../../src/profiling/profilers/wall') const SpaceProfiler = require('../../../src/profiling/profilers/space') const logger = require('../../../src/log') const { Profile } = require('pprof-format') -const semver = require('semver') const version = require('../../../../../package.json').version const RUNTIME_ID = 'a1b2c3d4-a1b2-a1b2-a1b2-a1b2c3d4e5f6' @@ -26,10 +25,6 @@ const HOST = 'test-host' const SERVICE = 'test-service' const APP_VERSION = '1.2.3' -if (!semver.satisfies(process.version, '>=10.12')) { - describe = describe.skip // eslint-disable-line no-global-assign -} - function wait (ms) { return new Promise((resolve, reject) => { setTimeout(resolve, ms) @@ -303,7 +298,7 @@ describe('exporters/agent', function () { /^Adding wall profile to agent export:( [0-9a-f]{2})+$/, /^Adding space profile to agent export:( [0-9a-f]{2})+$/, /^Submitting profiler agent report attempt #1 to:/i, - /^Error from the agent: HTTP Error 400$/, + /^Error from the agent: HTTP Error 500$/, /^Submitting profiler agent report attempt #2 to:/i, /^Agent export response: ([0-9a-f]{2}( |$))*/ ] @@ -344,7 +339,7 @@ describe('exporters/agent', function () { return } const data = Buffer.from(json) - res.writeHead(400, { + res.writeHead(500, { 'content-type': 'application/json', 'content-length': data.length }) @@ -356,6 +351,43 @@ describe('exporters/agent', function () { waitForResponse ]) }) + + it('should not retry on 4xx errors', async function () { + const exporter = newAgentExporter({ url, logger: { debug: () => {}, error: () => {} } }) + const start = new Date() + const end = new Date() + const tags = { foo: 'bar' } + + const [wall, space] = await Promise.all([ + createProfile(['wall', 'microseconds']), + createProfile(['space', 'bytes']) + ]) + + const profiles = { + wall, + space + } + + let tries = 0 + const json = JSON.stringify({ error: 'some error' }) + app.post('/profiling/v1/input', upload.any(), (_, res) => { + tries++ + const data = Buffer.from(json) + res.writeHead(400, { + 'content-type': 'application/json', + 'content-length': data.length + }) + res.end(data) + }) + + try { + await exporter.export({ profiles, start, end, tags }) + throw new Error('should have thrown') + } catch (err) { + expect(err.message).to.equal('HTTP Error 400') + } + expect(tries).to.equal(1) + }) }) describe('using ipv6', () => { diff --git a/packages/dd-trace/test/proxy.spec.js b/packages/dd-trace/test/proxy.spec.js index a21e2f4226a..3d7ebbc5a2a 100644 --- a/packages/dd-trace/test/proxy.spec.js +++ b/packages/dd-trace/test/proxy.spec.js @@ -131,7 +131,8 @@ describe('TracerProxy', () => { remoteConfig: { enabled: true }, - configure: sinon.spy() + configure: sinon.spy(), + llmobs: {} } Config = sinon.stub().returns(config) diff --git a/plugin-env b/plugin-env new file mode 100755 index 00000000000..78166b8ca72 --- /dev/null +++ b/plugin-env @@ -0,0 +1,91 @@ +#!/usr/bin/env bash +args=("$@") +plugin_name=${args[0]} + +YELLOW='\033[33m' +RESET='\033[0m' # No Color + +if [ -z "$plugin_name" ]; then + echo "Usage: ./plugin-env " + echo " is the name of the dd-trace plugin to enter the dev environment for." + echo "" + echo " It can be one of the following:" + node - << EOF + const fs=require('fs'); + const yaml = require('yaml'); + const pluginsData = fs.readFileSync('.github/workflows/plugins.yml', 'utf8'); + const env=Object.keys(yaml.parse(pluginsData).jobs); + console.log(...env); +EOF + exit 1 +fi + +if ! hash node 2>/dev/null; then + echo "Node.js is not installed. Please install Node.js before running this script." + echo "You can use nvm to install Node.js. See https://nvm.sh for more information." + echo "For best results, use the latest version of Node.js." + exit 1 +fi + +if ! hash yarn 2>/dev/null; then + echo "yarn@1.x is not installed. Please install yarn@1.x before running this script." + echo "You can install yarn by running 'npm install -g yarn'." + exit 1 +fi + +read -r PLUGINS SERVICES <<<$(node - << EOF +const fs=require('fs'); +const yaml = require('yaml'); +const pluginsData = fs.readFileSync('.github/workflows/plugins.yml', 'utf8'); +const { PLUGINS, SERVICES } = yaml.parse(pluginsData).jobs['$plugin_name'].env; +console.log(PLUGINS || '', SERVICES || '') +EOF +) + +export PLUGINS +export SERVICES + +if [ -z "$SERVICES" ]; then + echo "The plugin '$plugin_name' does not have any services defined. Nothing to do here." +else + if ! hash docker 2>/dev/null; then + echo "Docker is not installed. Please install Docker before running this script." + echo "You can install Docker by following the instructions at https://docs.docker.com/get-docker/." + exit 1 + fi + if (! docker stats --no-stream >/dev/null); then + echo "The docker daemon is not running. Please start Docker before running this script." + exit 1 + fi + if [ -z `docker ps -q --no-trunc | grep $(docker-compose ps -q $SERVICES)` ]; then + teardown=1 + docker compose up -d $SERVICES + fi +fi + +yarn services + +echo -e $YELLOW +echo -e "You are now in a sub-shell (i.e. a dev environment) for the dd-trace plugin '$plugin_name'." +echo -e "The following environment variables set:${RESET}" +echo -e "\tPLUGINS=$PLUGINS" +echo -e "\tSERVICES=$SERVICES" +echo -e "${YELLOW}The ${RESET}versions${YELLOW} directory has been populated, and any ${RESET}\$SERVICES${YELLOW} have been brought up if not already running." +echo -e "You can now run the plugin's tests with:" +echo -e "\t${RESET}yarn test:plugins" +echo -e "${YELLOW}To exit this shell, type 'exit' or do Ctrl+D." +echo -e $RESET + +$SHELL + +if [ -n "$teardown" ]; then + docker compose stop $SERVICES +fi + +echo -e $YELLOW +echo "Exited the sub-shell for the dd-trace plugin '$plugin_name'." +if [ -n "$teardown" ]; then + echo "Also stopped any services that were started." +fi +echo "You're now back in the main shell." +echo -e $RESET diff --git a/requirements.json b/requirements.json new file mode 100644 index 00000000000..85fc7c33894 --- /dev/null +++ b/requirements.json @@ -0,0 +1,85 @@ +{ + "$schema": "https://raw.githubusercontent.com/DataDog/auto_inject/refs/heads/main/preload_go/cmd/library_requirements_tester/testdata/requirements_schema.json", + "version": 1, + "native_deps": { + "glibc": [{ + "arch": "arm", + "supported": true, + "description": "From ubuntu xenial (16.04)", + "min": "2.23" + },{ + "arch": "arm64", + "supported": true, + "description": "From centOS 7", + "min": "2.17" + },{ + "arch": "x64", + "supported": true, + "description": "From centOS 7", + "min": "2.17" + },{ + "arch": "x86", + "supported": true, + "description": "From debian jessie (8)", + "min": "2.19" + }], + "musl": [{ + "arch": "arm", + "supported": true, + "description": "From alpine 3.13" + },{ + "arch": "arm64", + "supported": true, + "description": "From alpine 3.13" + },{ + "arch": "x64", + "supported": true, + "description": "From alpine 3.13" + },{ + "arch": "x86", + "supported": true, + "description": "From alpine 3.13" + }] + }, + "deny": [ + { + "id": "npm", + "description": "Ignore the npm CLI", + "os": null, + "cmds": [ + "**/node", + "**/nodejs", + "**/ts-node", + "**/ts-node-*" + ], + "args": [{ "args": ["*/npm-cli.js"], "position": 1}], + "envars": null + }, + { + "id": "yarn", + "description": "Ignore the yarn CLI", + "os": null, + "cmds": [ + "**/node", + "**/nodejs", + "**/ts-node", + "**/ts-node-*" + ], + "args": [{ "args": ["*/yarn.js"], "position": 1}], + "envars": null + }, + { + "id": "pnpm", + "description": "Ignore the pnpm CLI", + "os": null, + "cmds": [ + "**/node", + "**/nodejs", + "**/ts-node", + "**/ts-node-*" + ], + "args": [{ "args": ["*/pnpm.cjs"], "position": 1}], + "envars": null + } + ] +} diff --git a/scripts/release/proposal.js b/scripts/release/proposal.js new file mode 100644 index 00000000000..b5c16de4c0e --- /dev/null +++ b/scripts/release/proposal.js @@ -0,0 +1,128 @@ +'use strict' + +/* eslint-disable no-console */ + +// TODO: Support major versions. + +const { execSync } = require('child_process') +const fs = require('fs') +const path = require('path') + +// Helpers for colored output. +const log = msg => console.log(msg) +const success = msg => console.log(`\x1b[32m${msg}\x1b[0m`) +const error = msg => console.log(`\x1b[31m${msg}\x1b[0m`) +const whisper = msg => console.log(`\x1b[90m${msg}\x1b[0m`) + +const currentBranch = capture('git branch --show-current') +const releaseLine = process.argv[2] + +// Validate release line argument. +if (!releaseLine || releaseLine === 'help' || releaseLine === '--help') { + log('Usage: node scripts/release/proposal [release-type]') + process.exit(0) +} else if (!releaseLine?.match(/^\d+$/)) { + error('Invalid release line. Must be a whole number.') + process.exit(1) +} + +// Make sure the release branch is up to date to prepare for new proposal. +// The main branch is not automatically pulled to avoid inconsistencies between +// release lines if new commits are added to it during a release. +run(`git checkout v${releaseLine}.x`) +run('git pull') + +const diffCmd = [ + 'branch-diff', + '--user DataDog', + '--repo dd-trace-js', + isActivePatch() + ? `--exclude-label=semver-major,semver-minor,dont-land-on-v${releaseLine}.x` + : `--exclude-label=semver-major,dont-land-on-v${releaseLine}.x` +].join(' ') + +// Determine the new version. +const [lastMajor, lastMinor, lastPatch] = require('../../package.json').version.split('.').map(Number) +const lineDiff = capture(`${diffCmd} v${releaseLine}.x master`) +const newVersion = lineDiff.includes('SEMVER-MINOR') + ? `${releaseLine}.${lastMinor + 1}.0` + : `${releaseLine}.${lastMinor}.${lastPatch + 1}` + +// Checkout new branch and output new changes. +run(`git checkout v${newVersion}-proposal || git checkout -b v${newVersion}-proposal`) + +// Get the hashes of the last version and the commits to add. +const lastCommit = capture('git log -1 --pretty=%B').trim() +const proposalDiff = capture(`${diffCmd} --format=sha --reverse v${newVersion}-proposal master`) + .replace(/\n/g, ' ').trim() + +if (proposalDiff) { + // We have new commits to add, so revert the version commit if it exists. + if (lastCommit === `v${newVersion}`) { + run('git reset --hard HEAD~1') + } + + // Output new changes since last commit of the proposal branch. + run(`${diffCmd} v${newVersion}-proposal master`) + + // Cherry pick all new commits to the proposal branch. + try { + run(`echo "${proposalDiff}" | xargs git cherry-pick`) + } catch (err) { + error('Cherry-pick failed. Resolve the conflicts and run `git cherry-pick --continue` to continue.') + error('When all conflicts have been resolved, run this script again.') + process.exit(1) + } +} + +// Update package.json with new version. +run(`npm version --git-tag-version=false ${newVersion}`) +run(`git commit -uno -m v${newVersion} package.json || exit 0`) + +ready() + +// Check if current branch is already an active patch proposal branch to avoid +// creating a new minor proposal branch if new minor commits are added to the +// main branch during a existing patch release. +function isActivePatch () { + const currentMatch = currentBranch.match(/^(\d+)\.(\d+)\.(\d+)-proposal$/) + + if (currentMatch) { + const [major, minor, patch] = currentMatch.slice(1).map(Number) + + if (major === lastMajor && minor === lastMinor && patch > lastPatch) { + return true + } + } + + return false +} + +// Output a command to the terminal and execute it. +function run (cmd) { + whisper(`> ${cmd}`) + + const output = execSync(cmd, {}).toString() + + log(output) +} + +// Run a command and capture its output to return it to the caller. +function capture (cmd) { + return execSync(cmd, {}).toString() +} + +// Write release notes to a file that can be copied to the GitHub release. +function ready () { + const notesDir = path.join(__dirname, '..', '..', '.github', 'release_notes') + const notesFile = path.join(notesDir, `${newVersion}.md`) + const lineDiff = capture(`${diffCmd} --markdown=true v${releaseLine}.x master`) + + fs.mkdirSync(notesDir, { recursive: true }) + fs.writeFileSync(notesFile, lineDiff) + + success('Release proposal is ready.') + success(`Changelog at .github/release_notes/${newVersion}.md`) + + process.exit(0) +} diff --git a/version.js b/version.js index 63fc5e5ce9e..6bd714a14e9 100644 --- a/version.js +++ b/version.js @@ -1,7 +1,9 @@ 'use strict' -const ddMatches = require('./package.json').version.match(/^(\d+)\.(\d+)\.(\d+)/) -const nodeMatches = process.versions.node.match(/^(\d+)\.(\d+)\.(\d+)/) +/* eslint-disable no-var */ + +var ddMatches = require('./package.json').version.match(/^(\d+)\.(\d+)\.(\d+)/) +var nodeMatches = process.versions.node.match(/^(\d+)\.(\d+)\.(\d+)/) module.exports = { DD_MAJOR: parseInt(ddMatches[1]), diff --git a/yarn.lock b/yarn.lock index a839f3a3c84..77dacb70614 100644 --- a/yarn.lock +++ b/yarn.lock @@ -416,25 +416,25 @@ lru-cache "^7.14.0" node-gyp-build "^4.5.0" -"@datadog/native-iast-taint-tracking@3.1.0": - version "3.1.0" - resolved "https://registry.yarnpkg.com/@datadog/native-iast-taint-tracking/-/native-iast-taint-tracking-3.1.0.tgz#7b2ed7f8fad212d65e5ab03bcdea8b42a3051b2e" - integrity sha512-rw6qSjmxmu1yFHVvZLXFt/rVq2tUZXocNogPLB8n7MPpA0jijNGb109WokWw5ITImiW91GcGDuBW6elJDVKouQ== +"@datadog/native-iast-taint-tracking@3.2.0": + version "3.2.0" + resolved "https://registry.yarnpkg.com/@datadog/native-iast-taint-tracking/-/native-iast-taint-tracking-3.2.0.tgz#9fb6823d82f934e12c06ea1baa7399ca80deb2ec" + integrity sha512-Mc6FzCoyvU5yXLMsMS9yKnEqJMWoImAukJXolNWCTm+JQYCMf2yMsJ8pBAm7KyZKliamM9rCn7h7Tr2H3lXwjA== dependencies: node-gyp-build "^3.9.0" -"@datadog/native-metrics@^2.0.0": - version "2.0.0" - resolved "https://registry.npmjs.org/@datadog/native-metrics/-/native-metrics-2.0.0.tgz" - integrity sha512-YklGVwUtmKGYqFf1MNZuOHvTYdKuR4+Af1XkWcMD8BwOAjxmd9Z+97328rCOY8TFUJzlGUPaXzB8j2qgG/BMwA== +"@datadog/native-metrics@^3.0.1": + version "3.0.1" + resolved "https://registry.yarnpkg.com/@datadog/native-metrics/-/native-metrics-3.0.1.tgz#dc276c93785c0377a048e316f23b7c8ff3acfa84" + integrity sha512-0GuMyYyXf+Qpb/F+Fcekz58f2mO37lit9U3jMbWY/m8kac44gCPABzL5q3gWbdH+hWgqYfQoEYsdNDGSrKfwoQ== dependencies: node-addon-api "^6.1.0" node-gyp-build "^3.9.0" -"@datadog/pprof@5.3.0": - version "5.3.0" - resolved "https://registry.yarnpkg.com/@datadog/pprof/-/pprof-5.3.0.tgz#c2f58d328ecced7f99887f1a559d7fe3aecb9219" - integrity sha512-53z2Q3K92T6Pf4vz4Ezh8kfkVEvLzbnVqacZGgcbkP//q0joFzO8q00Etw1S6NdnCX0XmX08ULaF4rUI5r14mw== +"@datadog/pprof@5.4.1": + version "5.4.1" + resolved "https://registry.yarnpkg.com/@datadog/pprof/-/pprof-5.4.1.tgz#08c9bcf5d8efb2eeafdfc9f5bb5402f79fb41266" + integrity sha512-IvpL96e/cuh8ugP5O8Czdup7XQOLHeIDgM5pac5W7Lc1YzGe5zTtebKFpitvb1CPw1YY+1qFx0pWGgKP2kOfHg== dependencies: delay "^5.0.0" node-gyp-build "<4.0"