diff --git a/packages/kbn-eslint-plugin-imports/README.mdx b/packages/kbn-eslint-plugin-imports/README.mdx index 1d6971d30e97f2..b8eb60e98c3ed8 100644 --- a/packages/kbn-eslint-plugin-imports/README.mdx +++ b/packages/kbn-eslint-plugin-imports/README.mdx @@ -39,6 +39,10 @@ This rule validates that every import request in the repsitory follows a standar This rule is not configurable, should never be skipped, and is auto-fixable. +## `@kbn/imports/require_import` + +This rule validates that specific imports are present in a file. This allows you to e.g. require Mocha globals in test files automatically. Currently the only supported import type is a TypeScript reference type. This rule is auto-fixable. + ## `@kbn/imports/exports_moved_packages` This rule assists package authors who are doing the good work of breaking up large packages. The goal is to define exports which used to be part of one package as having moved to another package. The configuration maintains this mapping and is designed to be extended in the future is additional needs arrise like targetting specific package types. diff --git a/packages/kbn-eslint-plugin-imports/index.ts b/packages/kbn-eslint-plugin-imports/index.ts index 2db05a0d59baa3..5ff083ce4a1e00 100644 --- a/packages/kbn-eslint-plugin-imports/index.ts +++ b/packages/kbn-eslint-plugin-imports/index.ts @@ -12,6 +12,7 @@ import { UniformImportsRule } from './src/rules/uniform_imports'; import { ExportsMovedPackagesRule } from './src/rules/exports_moved_packages'; import { NoUnusedImportsRule } from './src/rules/no_unused_imports'; import { NoBoundaryCrossingRule } from './src/rules/no_boundary_crossing'; +import { RequireImportRule } from './src/rules/require_import'; /** * Custom ESLint rules, add `'@kbn/eslint-plugin-imports'` to your eslint config to use them @@ -23,4 +24,5 @@ export const rules = { exports_moved_packages: ExportsMovedPackagesRule, no_unused_imports: NoUnusedImportsRule, no_boundary_crossing: NoBoundaryCrossingRule, + require_import: RequireImportRule, }; diff --git a/packages/kbn-eslint-plugin-imports/src/rules/require_import.test.ts b/packages/kbn-eslint-plugin-imports/src/rules/require_import.test.ts new file mode 100644 index 00000000000000..ad2ca020f3d283 --- /dev/null +++ b/packages/kbn-eslint-plugin-imports/src/rules/require_import.test.ts @@ -0,0 +1,89 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +import { RuleTester } from 'eslint'; +import { RequireImportRule } from './require_import'; +import dedent from 'dedent'; + +const fmt = (str: TemplateStringsArray) => dedent(str) + '\n'; + +const tsTester = [ + '@typescript-eslint/parser', + new RuleTester({ + parser: require.resolve('@typescript-eslint/parser'), + parserOptions: { + sourceType: 'module', + ecmaVersion: 2018, + ecmaFeatures: { + jsx: true, + }, + }, + }), +] as const; + +const babelTester = [ + '@babel/eslint-parser', + new RuleTester({ + parser: require.resolve('@babel/eslint-parser'), + parserOptions: { + sourceType: 'module', + ecmaVersion: 2018, + requireConfigFile: false, + babelOptions: { + presets: ['@kbn/babel-preset/node_preset'], + }, + }, + }), +] as const; + +for (const [name, tester] of [tsTester, babelTester]) { + describe(name, () => { + tester.run('@kbn/imports/require_import', RequireImportRule, { + valid: [ + { + options: ['mocha'], + filename: 'foo.ts', + code: fmt` + import 'mocha'; + + /// + + describe(( ) => { + before(( ) => { + }); + }); + `, + }, + ], + invalid: [ + { + options: ['mocha'], + filename: 'foo.ts', + code: fmt` + describe(( ) => { + before(( ) => { + }); + }); + `, + output: fmt`/// + + describe(( ) => { + before(( ) => { + }); + });`, + errors: [ + { + line: 1, + message: `Required module 'mocha' is not imported as a type reference`, + }, + ], + }, + ], + }); + }); +} diff --git a/packages/kbn-eslint-plugin-imports/src/rules/require_import.ts b/packages/kbn-eslint-plugin-imports/src/rules/require_import.ts new file mode 100644 index 00000000000000..f766735ab7e669 --- /dev/null +++ b/packages/kbn-eslint-plugin-imports/src/rules/require_import.ts @@ -0,0 +1,105 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +import type { Rule } from 'eslint'; +import { load } from 'cheerio'; + +type StringModuleConfig = string; + +interface ObjectModuleConfig { + module: string; + as: ReferenceModuleAs; +} + +type ModuleConfig = StringModuleConfig | ObjectModuleConfig; + +enum ReferenceModuleAs { + typeReference = 'typeReference', +} + +export const RequireImportRule: Rule.RuleModule = { + meta: { + type: 'problem', + fixable: 'code', + docs: { + url: 'https://github.com/elastic/kibana/blob/main/packages/kbn-eslint-plugin-imports/README.mdx#kbnimportsrequire_import', + }, + schema: { + type: 'array', + items: { + oneOf: [ + { + type: 'string', + }, + { + type: 'object', + additionalProperties: false, + additionalItems: false, + properties: { + module: { + type: 'string', + }, + as: { + type: 'string', + }, + }, + required: ['module', 'type'], + }, + ], + }, + }, + }, + + create(context) { + const requiredImports: ModuleConfig[] = context.options; + + const mappedOptions: ObjectModuleConfig[] = requiredImports.map((config) => { + if (typeof config === 'string') { + return { + module: config, + as: ReferenceModuleAs.typeReference, + }; + } + return config; + }); + + return { + 'Program:exit': (node) => { + mappedOptions.forEach((option) => { + switch (option.as) { + case ReferenceModuleAs.typeReference: + const hasImport = node.comments?.some((comment) => { + const nodeText = comment.value.match(/\/\s*(<.*>)/)?.[1]; + if (nodeText) { + const parsedNode = load(nodeText, { xml: true })()._root?.children()[0]; + return ( + parsedNode && + parsedNode.name === 'reference' && + parsedNode.attribs.types === option.module + ); + } + }); + + if (!hasImport) { + context.report({ + node, + message: `Required module '${option.module}' is not imported as a type reference`, + fix(fixer) { + return fixer.insertTextBefore( + node.body[0], + `/// \n\n` + ); + }, + }); + } + } + }); + }, + }; + }, +}; diff --git a/x-pack/plugins/observability_ai_assistant/jest.config.js b/x-pack/plugins/observability_ai_assistant/jest.config.js index ff54dbc08c2b0b..61cc1b353b1cf1 100644 --- a/x-pack/plugins/observability_ai_assistant/jest.config.js +++ b/x-pack/plugins/observability_ai_assistant/jest.config.js @@ -8,11 +8,16 @@ module.exports = { preset: '@kbn/test', rootDir: '../../..', - roots: ['/x-pack/plugins/observability_ai_assistant'], + roots: [ + '/x-pack/plugins/observability_ai_assistant/public', + '/x-pack/plugins/observability_ai_assistant/common', + '/x-pack/plugins/observability_ai_assistant/server', + ], setupFiles: ['/x-pack/plugins/observability_ai_assistant/.storybook/jest_setup.js'], collectCoverage: true, collectCoverageFrom: [ '/x-pack/plugins/observability_ai_assistant/{common,public,server}/**/*.{js,ts,tsx}', ], + coverageReporters: ['html'], }; diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/.eslintrc.json b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/.eslintrc.json new file mode 100644 index 00000000000000..c803815e57692a --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/.eslintrc.json @@ -0,0 +1,21 @@ +{ + "overrides": [ + { + "files": [ + "**/*.spec.ts" + ], + "rules": { + "@kbn/imports/require_import": [ + "error", + "@kbn/ambient-ftr-types" + ], + "@typescript-eslint/triple-slash-reference": [ + "off" + ], + "spaced-comment": [ + "off" + ] + } + } + ] +} diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/evaluation.ts b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/evaluation.ts index 5c51653036645c..c39087249285c9 100644 --- a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/evaluation.ts +++ b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/evaluation.ts @@ -5,22 +5,25 @@ * 2.0. */ -import yargs from 'yargs'; -import { run } from '@kbn/dev-cli-runner'; import { Client } from '@elastic/elasticsearch'; -import inquirer from 'inquirer'; +import { run } from '@kbn/dev-cli-runner'; import * as fastGlob from 'fast-glob'; -import Path from 'path'; +import inquirer from 'inquirer'; +import yargs from 'yargs'; import chalk from 'chalk'; +import { castArray, omit } from 'lodash'; +// @ts-expect-error +import Mocha from 'mocha'; +import Path from 'path'; import * as table from 'table'; -import { castArray, omit, sortBy } from 'lodash'; import { TableUserConfig } from 'table'; import { format, parse } from 'url'; +import { MessageRole } from '../../common'; import { options } from './cli'; import { getServiceUrls } from './get_service_urls'; import { KibanaClient } from './kibana_client'; -import { EvaluationFunction } from './types'; -import { MessageRole } from '../../common'; +import { initServices } from './services'; +import { setupSynthtrace } from './setup_synthtrace'; function runEvaluations() { yargs(process.argv.slice(2)) @@ -69,43 +72,24 @@ function runEvaluations() { const scenarios = (argv.files !== undefined && castArray(argv.files).map((file) => Path.join(process.cwd(), file))) || - fastGlob.sync(Path.join(__dirname, './scenarios/**/*.ts')); + fastGlob.sync(Path.join(__dirname, './scenarios/**/*.spec.ts')); if (!scenarios.length) { throw new Error('No scenarios to run'); } - if (argv.clear) { - log.info('Clearing conversations'); - await esClient.deleteByQuery({ - index: '.kibana-observability-ai-assistant-conversations', - query: { - ...(argv.spaceId ? { term: { namespace: argv.spaceId } } : { match_all: {} }), - }, - refresh: true, - }); - } + log.info('Setting up Synthtrace clients'); - let evaluationFunctions: Array<{ - name: string; - fileName: string; - fn: EvaluationFunction; - }> = []; - - for (const fileName of scenarios) { - log.info(`Running scenario ${fileName}`); - const mod = await import(fileName); - Object.keys(mod).forEach((key) => { - evaluationFunctions.push({ name: key, fileName, fn: mod[key] }); - }); - } + const synthtraceEsClients = await setupSynthtrace({ + target: serviceUrls.kibanaUrl, + client: esClient, + log, + }); - if (argv.grep) { - const lc = argv.grep.toLowerCase(); - evaluationFunctions = evaluationFunctions.filter((fn) => - fn.name.toLowerCase().includes(lc) - ); - } + const chatClient = kibanaClient.createChatClient({ + connectorId: connector.id!, + persist: argv.persist, + }); const header: string[][] = [ [chalk.bold('Criterion'), chalk.bold('Result'), chalk.bold('Reasoning')], @@ -144,19 +128,7 @@ function runEvaluations() { ], }; - const sortedEvaluationFunctions = sortBy(evaluationFunctions, 'fileName', 'name'); - - for (const { name, fn } of sortedEvaluationFunctions) { - log.debug(`Executing ${name}`); - const result = await fn({ - esClient, - kibanaClient, - chatClient: kibanaClient.createChatClient({ - connectorId: connector.id!, - persist: argv.persist, - title: argv.autoTitle ? undefined : name, - }), - }); + chatClient.onResult((result) => { log.debug(`Result:`, JSON.stringify(result)); const output: string[][] = [ [ @@ -184,7 +156,46 @@ function runEvaluations() { ]); }); log.write(table.table(output, tableConfig)); + }); + + initServices({ + kibanaClient, + esClient, + chatClient, + synthtraceEsClients, + }); + + const mocha = new Mocha({ + grep: argv.grep, + timeout: '5m', + }); + + mocha.suite.beforeAll(async () => { + if (argv.clear) { + log.info('Clearing conversations'); + await esClient.deleteByQuery({ + index: '.kibana-observability-ai-assistant-conversations', + query: { + ...(argv.spaceId ? { term: { namespace: argv.spaceId } } : { match_all: {} }), + }, + refresh: true, + }); + } + }); + + for (const filename of scenarios) { + mocha.addFile(filename); } + + return new Promise((resolve, reject) => { + mocha.run((failures: any) => { + if (failures) { + reject(new Error(`Some tests failed`)); + return; + } + resolve(); + }); + }); }, { log: { diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/get_service_urls.ts b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/get_service_urls.ts index d554e6c70a2fc0..ce909c7a973943 100644 --- a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/get_service_urls.ts +++ b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/get_service_urls.ts @@ -140,7 +140,7 @@ export async function getServiceUrls({ const kibanaUrlWithAuth = format({ ...parsedKibanaUrl, - auth, + auth: parsedKibanaUrl.auth || auth, }); const validatedKibanaUrl = await getKibanaUrl({ kibana: kibanaUrlWithAuth, log }); diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/kibana_client.ts b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/kibana_client.ts index 7c447931f30dee..585dec254db249 100644 --- a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/kibana_client.ts +++ b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/kibana_client.ts @@ -6,7 +6,7 @@ */ import axios, { AxiosInstance, AxiosResponse } from 'axios'; -import { pick } from 'lodash'; +import { pick, remove } from 'lodash'; import { filter, lastValueFrom, map, tap, toArray } from 'rxjs'; import { format, parse, UrlObject } from 'url'; import { Message, MessageRole } from '../../common'; @@ -29,7 +29,7 @@ import { EvaluationResult } from './types'; type InnerMessage = Message['message']; type StringOrMessageList = string | InnerMessage[]; -interface ChatClient { +export interface ChatClient { chat: (message: StringOrMessageList) => Promise; complete: ( ...args: [StringOrMessageList] | [string, InnerMessage[]] @@ -39,6 +39,8 @@ interface ChatClient { {}: { conversationId?: string; messages: InnerMessage[] }, criteria: string[] ) => Promise; + getResults: () => EvaluationResult[]; + onResult: (cb: (result: EvaluationResult) => void) => () => void; } export class KibanaClient { @@ -72,11 +74,9 @@ export class KibanaClient { createChatClient({ connectorId, persist, - title, }: { connectorId: string; persist: boolean; - title?: string; }): ChatClient { function getMessages(message: string | Array): Array { if (typeof message === 'string') { @@ -103,6 +103,11 @@ export class KibanaClient { return { functionDefinitions, contextDefinitions }; } + const onResultCallbacks: Array<{ + callback: (result: EvaluationResult) => void; + unregister: () => void; + }> = []; + async function chat({ messages, functions, @@ -137,6 +142,8 @@ export class KibanaClient { return receivedMessage.message; } + const results: EvaluationResult[] = []; + return { chat: async (message) => { const { functionDefinitions, contextDefinitions } = await getFunctions(); @@ -172,7 +179,6 @@ export class KibanaClient { messages, connectorId, persist, - title, }, { responseType: 'stream' } ) @@ -287,14 +293,17 @@ export class KibanaClient { functionCall: 'scores', }); - return { + const scoredCriteria = ( + JSON.parse(message.function_call.arguments) as { + criteria: Array<{ index: number; score: number; reasoning: string }>; + } + ).criteria; + + const result: EvaluationResult = { conversationId, messages, - scores: ( - JSON.parse(message.function_call.arguments) as { - criteria: Array<{ index: number; score: number; reasoning: string }>; - } - ).criteria.map(({ index, score, reasoning }) => { + passed: scoredCriteria.every(({ score }) => score >= 1), + scores: scoredCriteria.map(({ index, score, reasoning }) => { return { criterion: criteria[index], score, @@ -302,6 +311,22 @@ export class KibanaClient { }; }), }; + + results.push(result); + + onResultCallbacks.forEach(({ callback }) => { + callback(result); + }); + + return result; + }, + getResults: () => results, + onResult: (callback) => { + const unregister = () => { + remove(onResultCallbacks, { callback }); + }; + onResultCallbacks.push({ callback, unregister }); + return unregister; }, }; } diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/scenarios/elasticsearch/index.spec.ts b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/scenarios/elasticsearch/index.spec.ts new file mode 100644 index 00000000000000..6f7ff7e333215a --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/scenarios/elasticsearch/index.spec.ts @@ -0,0 +1,26 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +/// + +import expect from '@kbn/expect'; +import { chatClient } from '../../services'; + +describe('health', () => { + it('returns the cluster health state', async () => { + const conversation = await chatClient.complete( + 'Can you tell me what the state of my Elasticsearch cluster is?' + ); + + const result = await chatClient.evaluate(conversation, [ + 'Calls the Elasticsearch function with method: GET and path: _cluster/health', + 'Describes the cluster status based on the response from the Elasticsearch function', + ]); + + expect(result.passed).to.be(true); + }); +}); diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/scenarios/elasticsearch/index.ts b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/scenarios/elasticsearch/index.ts deleted file mode 100644 index 1f73f9a9a625d5..00000000000000 --- a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/scenarios/elasticsearch/index.ts +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -import { EvaluationFunction } from '../../types'; - -export const health: EvaluationFunction = async ({ chatClient }) => { - const conversation = await chatClient.complete( - 'Can you tell me what the state of my Elasticsearch cluster is?' - ); - - const evaluation = await chatClient.evaluate(conversation, [ - 'Calls the Elasticsearch function with method: GET and path: _cluster/health', - 'Describes the cluster status based on the response from the Elasticsearch function', - ]); - - return evaluation; -}; diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/scenarios/esql/index.spec.ts b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/scenarios/esql/index.spec.ts new file mode 100644 index 00000000000000..8acc2c98aa504d --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/scenarios/esql/index.spec.ts @@ -0,0 +1,298 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +/// + +import { last } from 'lodash'; +import moment from 'moment'; +import { apm, timerange } from '@kbn/apm-synthtrace-client'; +import expect from '@kbn/expect'; +import { MessageRole } from '../../../../common'; +import { chatClient, esClient, synthtraceEsClients } from '../../services'; + +function extractEsqlQuery(response: string) { + return response.match(/```esql([\s\S]*?)```/)?.[1]; +} + +async function evaluateEsqlQuery({ + question, + expected, + criteria = [], + execute = true, +}: { + question: string; + expected?: string; + criteria?: string[]; + execute?: boolean; +}): Promise { + let conversation = await chatClient.complete(question); + + const esqlQuery = extractEsqlQuery(last(conversation.messages)?.content || ''); + + if (esqlQuery && execute) { + conversation = await chatClient.complete( + conversation.conversationId!, + conversation.messages.concat({ + content: '', + role: MessageRole.Assistant, + function_call: { + name: 'execute_query', + arguments: JSON.stringify({ + query: esqlQuery, + }), + trigger: MessageRole.User, + }, + }) + ); + } + + const evaluation = await chatClient.evaluate(conversation, [ + ...(expected + ? [ + `Returns a ES|QL query that is functionally equivalent to: + ${expected}`, + ] + : []), + ...(execute && expected ? [`The query successfully executed without an error`] : []), + ...criteria, + ]); + + expect(evaluation.passed).to.be(true); + + return; +} + +describe('ES|QL query generation', () => { + describe('other queries', () => { + describe('with packetbeat data', () => { + before(async () => { + await esClient.indices.create({ + index: 'packetbeat-8.11.3', + mappings: { + properties: { + '@timestamp': { + type: 'date', + }, + destination: { + type: 'object', + properties: { + domain: { + type: 'keyword', + }, + }, + }, + url: { + type: 'object', + properties: { + domain: { + type: 'keyword', + }, + }, + }, + }, + }, + }); + }); + + it('top 10 unique domains', async () => { + await evaluateEsqlQuery({ + question: + 'For standard Elastic ECS compliant packetbeat data view, create an ES|QL query that shows the top 10 unique domains by doc count', + expected: `FROM packetbeat-* + | STATS doc_count = COUNT(destination.domain) BY destination.domain + | SORT doc_count DESC + | LIMIT 10`, + }); + }); + + after(async () => { + await esClient.indices.delete({ + index: 'packetbeat-8.11.3', + allow_no_indices: true, + }); + }); + }); + + describe('with employees data', () => { + before(async () => { + await esClient.indices.create({ + index: 'employees', + mappings: { + properties: { + hire_date: { + type: 'date', + }, + emp_no: { + type: 'integer', + }, + salary: { + type: 'integer', + }, + }, + }, + }); + }); + + it('five earliest employees', async () => { + await evaluateEsqlQuery({ + question: + 'From employees, I want to see the 5 earliest employees (hire_date), I want to display only the month and the year that they were hired in and their employee number (emp_no). Format the date as e.g. "September 2019".', + expected: `FROM employees + | EVAL hire_date_formatted = DATE_FORMAT(hire_date, ""MMMM yyyy"") + | SORT hire_date + | KEEP emp_no, hire_date_formatted + | LIMIT 5`, + execute: false, + }); + }); + + it('employees with pagination', async () => { + await evaluateEsqlQuery({ + question: + 'From employees, I want to sort the documents by salary, and then return 10 results per page, and then see the second page', + criteria: [ + 'The assistant should mention that pagination is currently not supported in ES|QL', + ], + }); + }); + + after(async () => { + await esClient.indices.delete({ + index: 'employees', + }); + }); + }); + + it('logs avg cpu', async () => { + await evaluateEsqlQuery({ + question: + 'My logs data (ECS) is in `logs-*`. Show me a query that gets the average CPU per host, limit it to the top 10 results, in 1m buckets, and only include the last 15m. ', + expected: `FROM logs-* + | WHERE @timestamp >= NOW() - 15 minutes + | EVAL bucket = DATE_TRUNC(1 minute, @timestamp) + | STATS avg_cpu = AVG(system.cpu.total.norm.pct) BY bucket, host.name + | LIMIT 10`, + }); + }); + + it('metricbeat avg cpu', async () => { + await evaluateEsqlQuery({ + question: `from \`metricbeat*\`, using ES|QL, I want to see the percentage of CPU time normalized by the number of CPU cores, broken down by hostname. the fields are system.cpu.user.pct, system.cpu.system.pct, and system.cpu.cores`, + expected: `FROM metricbeat* + | EVAL cpu_pct_normalized = (system.cpu.user.pct + system.cpu.system.pct) / system.cpu.cores + | STATS AVG(cpu_pct_normalized) BY host.name`, + }); + }); + + it('postgres avg duration', async () => { + await evaluateEsqlQuery({ + question: + 'extract the query duration from postgres log messages in postgres-logs*, using ECS fields, and calculate the avg', + expected: `FROM postgres-logs + | DISSECT message "%{} duration: %{query_duration} ms" + | EVAL query_duration_num = TO_DOUBLE(query_duration) + | STATS avg_duration = AVG(query_duration_num)`, + }); + }); + + it('high cardinality logs', async () => { + await evaluateEsqlQuery({ + question: `i have logs in high-cardinality-data-fake_stack.admin-console-* . errors are found when log.level contais the value ERROR. generate a query to obtain the error rate as a percetage of the total logs per day for the last 7 days`, + expected: `FROM high-cardinality-data-fake_stack.admin-console-* + | WHERE @timestamp >= NOW() - 7 days + | EVAL error = CASE(log.level == "ERROR", 1, 0), total = 1 + | EVAL bucket = DATE_TRUNC(1 day, @timestamp) + | STATS total_errors = SUM(error), total_logs = SUM(total) BY bucket + | EVAL error_rate = (total_errors / total_logs) * 100`, + }); + }); + + it('nyc taxis dropoff time', async () => { + await evaluateEsqlQuery({ + question: + 'From `nyc_taxis`, give me a query that shows the top 10 results where the drop off time was between 6am and 10am.', + expected: `FROM nyc_taxis + | WHERE DATE_EXTRACT(drop_off_time, "hour") >= 6 AND DATE_EXTRACT(drop_off_time, "hour") < 10 + | LIMIT 10`, + }); + }); + }); + + describe('APM queries', () => { + before(async () => { + const myServiceInstance = apm + .service('my-service', 'production', 'go') + .instance('my-instance'); + + await synthtraceEsClients.apmSynthtraceEsClient.index( + timerange(moment().subtract(15, 'minutes'), moment()) + .interval('1m') + .rate(10) + .generator((timestamp) => + myServiceInstance + .transaction('GET /api') + .timestamp(timestamp) + .duration(50) + .outcome('success') + ) + ); + }); + + it('metrics avg duration', async () => { + await evaluateEsqlQuery({ + question: + 'I want to see a query for metrics-apm*, filtering on metricset.name:transaction and metricset.interval:1m, showing the average duration (via transaction.duration.histogram), in 50 buckets.', + expected: `FROM metrics-apm* + | WHERE metricset.name == "transaction" AND metricset.interval == "1m" + | EVAL bucket = AUTO_BUCKET(@timestamp, 50, , ) + | STATS avg_duration = AVG(transaction.duration.histogram) BY bucket`, + }); + }); + + it('service inventory', async () => { + await evaluateEsqlQuery({ + question: + 'I want to show a list of services with APM data. My data is in `traces-apm*`. I want to show the average transaction duration, the success rate (by dividing event.outcome:failure by event.outcome:failure+success), and total amount of requests. As a time range, select the last 24 hours. Use ES|QL.', + expected: `FROM traces-apm* + | WHERE @timestamp >= NOW() - 24 hours + | EVAL successful = CASE(event.outcome == "success", 1, 0), + failed = CASE(event.outcome == "failure", 1, 0) + | STATS success_rate = AVG(successful), + avg_duration = AVG(transaction.duration), + total_requests = COUNT(transaction.id) BY service.name`, + }); + }); + + it('exit span', async () => { + await evaluateEsqlQuery({ + question: `I've got APM data in \`metrics-apm\`. Filter on \`metricset.name:service_destination\` and the last 24 hours. Break down by span.destination.service.resource. Each document contains the count of total events (span.destination.service.response_time.count) for that document's interval and the total amount of latency (span.destination.service.response_time.sum.us). A document either contains an aggregate of failed events (event.outcome:success) or failed events (event.outcome:failure). A single document might represent multiple failures or successes, depending on the value of span.destination.service.response_time.count. For each value of span.destination.service.resource, give me the average throughput, latency per request, and failure rate, as a value between 0 and 1. Just show me the query.`, + expected: `FROM metrics-apm + | WHERE metricset.name == "service_destination" AND @timestamp >= NOW() - 24 hours + | EVAL total_response_time = span.destination.service.response_time.sum.us / span.destination.service.response_time.count, total_failures = CASE(event.outcome == "failure", 1, 0) * span.destination.service.response_time.count + | STATS + avg_throughput = AVG(span.destination.service.response_time.count), + avg_latency = AVG(total_response_time), + failure_rate = AVG(total_failures) + BY span.destination.service.resource`, + }); + }); + + it('trace duration', async () => { + await evaluateEsqlQuery({ + question: + 'My APM data is in `traces-apm*`. What’s the average for `transaction.duration.us` per service over the last hour?', + expected: `FROM traces-apm* + | WHERE @timestamp > NOW() - 1 hour + | STATS AVG(transaction.duration.us) BY service.name`, + }); + }); + + after(async () => { + await synthtraceEsClients.apmSynthtraceEsClient.clean(); + }); + }); +}); diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/scenarios/esql/index.ts b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/scenarios/esql/index.ts deleted file mode 100644 index c55df6e08849b2..00000000000000 --- a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/scenarios/esql/index.ts +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -import { last } from 'lodash'; -import { MessageRole } from '../../../../common'; -import { EvaluationFunction } from '../../types'; - -function extractEsqlQuery(response: string) { - return response.match(/```esql([\s\S]*?)```/)?.[1]; -} - -function createEsqlQueryEvaluation({ - question, - expected, - criteria = [], - execute = true, -}: { - question: string; - expected?: string; - criteria?: string[]; - execute?: boolean; -}): EvaluationFunction { - return async ({ chatClient }) => { - let conversation = await chatClient.complete(question); - - const esqlQuery = extractEsqlQuery(last(conversation.messages)?.content || ''); - - if (esqlQuery && execute) { - conversation = await chatClient.complete( - conversation.conversationId!, - conversation.messages.concat({ - content: '', - role: MessageRole.Assistant, - function_call: { - name: 'execute_query', - arguments: JSON.stringify({ - query: esqlQuery, - }), - trigger: MessageRole.User, - }, - }) - ); - } - - const evaluation = await chatClient.evaluate(conversation, [ - ...(expected - ? [ - `Returns a ES|QL query that is functionally equivalent to: - ${expected}`, - ] - : []), - ...(execute && expected ? [`The query successfully executed without an error`] : []), - ...criteria, - ]); - - return evaluation; - }; -} - -export const metricsApmQuery = createEsqlQueryEvaluation({ - question: - 'I want to see a query for metrics-apm*, filtering on metricset.name:transaction and metricset.interval:1m, showing the average duration (via transaction.duration.histogram), in 50 buckets.', - expected: `FROM metrics-apm* - | WHERE metricset.name == "transaction" AND metricset.interval == "1m" - | EVAL bucket = AUTO_BUCKET(@timestamp, 50, , ) - | STATS avg_duration = AVG(transaction.duration.histogram) BY bucket`, -}); - -export const packetbeatUniqueDomainsQuery = createEsqlQueryEvaluation({ - question: - 'For standard Elastic ECS compliant packetbeat data view, create an ES|QL query that shows the top 10 unique domains by doc count', - expected: `FROM packetbeat-* - | STATS doc_count = COUNT(destination.domain) BY destination.domain - | SORT doc_count DESC - | LIMIT 10`, -}); - -export const fiveEarliestEmployeesQuery = createEsqlQueryEvaluation({ - question: - 'From employees, I want to see the 5 earliest employees (hire_date), I want to display only the month and the year that they were hired in and their employee number (emp_no). Format the date as e.g. "September 2019".', - expected: `FROM employees - | EVAL hire_date_formatted = DATE_FORMAT(hire_date, ""MMMM yyyy"") - | SORT hire_date - | KEEP emp_no, hire_date_formatted - | LIMIT 5`, - execute: false, -}); - -export const employeesWithPaginationQuery = createEsqlQueryEvaluation({ - question: - 'From employees, I want to sort the documents by salary, and then return 10 results per page, and then see the second page', - criteria: ['The assistant should mention that pagination is currently not supported in ES|QL'], -}); - -export const logsAvgCpuQuery = createEsqlQueryEvaluation({ - question: - 'My logs data (ECS) is in `logs-*`. Show me a query that gets the average CPU per host, limit it to the top 10 results, in 1m buckets, and only include the last 15m. ', - expected: `FROM logs-* - | WHERE @timestamp >= NOW() - 15 minutes - | EVAL bucket = DATE_TRUNC(1 minute, @timestamp) - | STATS avg_cpu = AVG(system.cpu.total.norm.pct) BY bucket, host.name - | LIMIT 10`, -}); - -export const apmServiceInventoryQuery = createEsqlQueryEvaluation({ - question: - 'I want to show a list of services with APM data. My data is in `traces-apm*`. I want to show the average transaction duration, the success rate (by dividing event.outcome:failure by event.outcome:failure+success), and total amount of requests. As a time range, select the last 24 hours. Use ES|QL.', - expected: `FROM traces-apm* - | WHERE @timestamp >= NOW() - 24 hours - | EVAL successful = CASE(event.outcome == "success", 1, 0), - failed = CASE(event.outcome == "failure", 1, 0) - | STATS success_rate = AVG(successful), - avg_duration = AVG(transaction.duration), - total_requests = COUNT(transaction.id) BY service.name`, -}); - -export const metricbeatCpuQuery = createEsqlQueryEvaluation({ - question: `from \`metricbeat*\`, using ES|QL, I want to see the percentage of CPU time normalized by the number of CPU cores, broken down by hostname. the fields are system.cpu.user.pct, system.cpu.system.pct, and system.cpu.cores`, - expected: `FROM metricbeat* - | EVAL cpu_pct_normalized = (system.cpu.user.pct + system.cpu.system.pct) / system.cpu.cores - | STATS AVG(cpu_pct_normalized) BY host.name`, -}); - -export const postgresDurationQuery = createEsqlQueryEvaluation({ - question: - 'extract the query duration from postgres log messages in postgres-logs*, using ECS fields, and calculate the avg', - expected: `FROM postgres-logs - | DISSECT message "%{} duration: %{query_duration} ms" - | EVAL query_duration_num = TO_DOUBLE(query_duration) - | STATS avg_duration = AVG(query_duration_num)`, -}); - -export const apmExitSpanQuery = createEsqlQueryEvaluation({ - question: `I've got APM data in \`metrics-apm\`. Filter on \`metricset.name:service_destination\` and the last 24 hours. Break down by span.destination.service.resource. Each document contains the count of total events (span.destination.service.response_time.count) for that document's interval and the total amount of latency (span.destination.service.response_time.sum.us). A document either contains an aggregate of failed events (event.outcome:success) or failed events (event.outcome:failure). A single document might represent multiple failures or successes, depending on the value of span.destination.service.response_time.count. For each value of span.destination.service.resource, give me the average throughput, latency per request, and failure rate, as a value between 0 and 1. Just show me the query.`, - expected: `FROM metrics-apm - | WHERE metricset.name == "service_destination" AND @timestamp >= NOW() - 24 hours - | EVAL total_response_time = span.destination.service.response_time.sum.us / span.destination.service.response_time.count, total_failures = CASE(event.outcome == "failure", 1, 0) * span.destination.service.response_time.count - | STATS - avg_throughput = AVG(span.destination.service.response_time.count), - avg_latency = AVG(total_response_time), - failure_rate = AVG(total_failures) - BY span.destination.service.resource`, -}); - -export const highCardinalityLogsErrorQuery = createEsqlQueryEvaluation({ - question: `i have logs in high-cardinality-data-fake_stack.admin-console-* . errors are found when log.level contais the value ERROR. generate a query to obtain the error rate as a percetage of the total logs per day for the last 7 days`, - expected: `FROM high-cardinality-data-fake_stack.admin-console-* - | WHERE @timestamp >= NOW() - 7 days - | EVAL error = CASE(log.level == "ERROR", 1, 0), total = 1 - | EVAL bucket = DATE_TRUNC(1 day, @timestamp) - | STATS total_errors = SUM(error), total_logs = SUM(total) BY bucket - | EVAL error_rate = (total_errors / total_logs) * 100`, -}); - -export const nycTaxisDropoffTimeQuery = createEsqlQueryEvaluation({ - question: - 'From `nyc_taxis`, give me a query that shows the top 10 results where the drop off time was between 6am and 10am.', - expected: `FROM nyc_taxis - | WHERE DATE_EXTRACT(drop_off_time, "hour") >= 6 AND DATE_EXTRACT(drop_off_time, "hour") < 10 - | LIMIT 10`, -}); - -export const apmTraceDurationQuery = createEsqlQueryEvaluation({ - question: - 'My APM data is in `traces-apm*`. What’s the average for `transaction.duration.us` per service over the last hour?', - expected: `FROM traces-apm* - | WHERE @timestamp > NOW() - 1 hour - | STATS AVG(transaction.duration.us) BY service.name`, -}); diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/services/index.ts b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/services/index.ts new file mode 100644 index 00000000000000..5ecf0c48ddec38 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/services/index.ts @@ -0,0 +1,43 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { Client } from '@elastic/elasticsearch'; +import type { ChatClient, KibanaClient } from '../kibana_client'; +import type { SynthtraceEsClients } from '../setup_synthtrace'; + +function createErrorThrowingProxy(name: string): any { + return new Proxy( + {}, + { + get: () => { + throw new Error(`${name} has not been instantiated yet`); + }, + set: () => { + throw new Error(`${name} has not been instantiated yet`); + }, + } + ); +} + +export let chatClient: ChatClient = createErrorThrowingProxy('ChatClient'); +export let esClient: Client = createErrorThrowingProxy('esClient'); +export let kibanaClient: KibanaClient = createErrorThrowingProxy('kibanaClient'); + +export let synthtraceEsClients: SynthtraceEsClients = + createErrorThrowingProxy('synthtraceEsClients'); + +export const initServices = (services: { + chatClient: ChatClient; + esClient: Client; + kibanaClient: KibanaClient; + synthtraceEsClients: SynthtraceEsClients; +}) => { + chatClient = services.chatClient; + esClient = services.esClient; + kibanaClient = services.kibanaClient; + synthtraceEsClients = services.synthtraceEsClients; +}; diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/setup_synthtrace.ts b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/setup_synthtrace.ts new file mode 100644 index 00000000000000..f6b3180541ee24 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/setup_synthtrace.ts @@ -0,0 +1,90 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { + ApmSynthtraceEsClient, + InfraSynthtraceEsClient, + LogsSynthtraceEsClient, + ApmSynthtraceKibanaClient, +} from '@kbn/apm-synthtrace'; +import { ToolingLog } from '@kbn/tooling-log'; +import { isPromise } from 'util/types'; +import { Logger } from '@kbn/apm-synthtrace/src/lib/utils/create_logger'; +import { Client } from '@elastic/elasticsearch'; + +export interface SynthtraceEsClients { + apmSynthtraceEsClient: ApmSynthtraceEsClient; + infraSynthtraceEsClient: InfraSynthtraceEsClient; + logsSynthtraceEsClient: LogsSynthtraceEsClient; +} + +export async function setupSynthtrace({ + log, + client, + target, +}: { + log: ToolingLog; + client: Client; + target: string; +}): Promise { + const logger: Logger = { + debug: (...args) => log.debug(...args), + info: (...args) => log.info(...args), + error: (...args) => log.error(args.map((arg) => arg.toString()).join(' ')), + perf: (name, cb) => { + const now = performance.now(); + + const result = cb(); + + function measure() { + const after = performance.now(); + log.debug(`[${name}] took ${after - now} ms`); + } + + if (isPromise(result)) { + result.finally(measure); + } else { + measure(); + } + + return result; + }, + }; + const kibanaClient = new ApmSynthtraceKibanaClient({ + target, + logger, + }); + + const latestVersion = await kibanaClient.fetchLatestApmPackageVersion(); + + await kibanaClient.installApmPackage(latestVersion); + + const apmSynthtraceEsClient = new ApmSynthtraceEsClient({ + logger, + client, + version: latestVersion, + refreshAfterIndex: true, + }); + + const logsSynthtraceEsClient = new LogsSynthtraceEsClient({ + client, + logger, + refreshAfterIndex: true, + }); + + const infraSynthtraceEsClient = new InfraSynthtraceEsClient({ + client, + logger, + refreshAfterIndex: true, + }); + + return { + apmSynthtraceEsClient, + logsSynthtraceEsClient, + infraSynthtraceEsClient, + }; +} diff --git a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/types.ts b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/types.ts index 3ee8c2eaebe99d..88182c480ff216 100644 --- a/x-pack/plugins/observability_ai_assistant/scripts/evaluation/types.ts +++ b/x-pack/plugins/observability_ai_assistant/scripts/evaluation/types.ts @@ -8,16 +8,19 @@ import type { Client } from '@elastic/elasticsearch'; import { Message } from '../../common'; import { KibanaClient } from './kibana_client'; +import { SynthtraceEsClients } from './setup_synthtrace'; export interface ScenarioOptions { esClient: Client; kibanaClient: KibanaClient; chatClient: ReturnType; + synthtraceClients: SynthtraceEsClients; } export interface EvaluationResult { conversationId?: string; messages: Array; + passed: boolean; scores: Array<{ criterion: string; reasoning: string; diff --git a/x-pack/plugins/observability_ai_assistant/tsconfig.json b/x-pack/plugins/observability_ai_assistant/tsconfig.json index bf6f8ae0ad30b7..31e5d0e6fe9bc7 100644 --- a/x-pack/plugins/observability_ai_assistant/tsconfig.json +++ b/x-pack/plugins/observability_ai_assistant/tsconfig.json @@ -56,7 +56,10 @@ "@kbn/core-analytics-browser", "@kbn/core-http-browser", "@kbn/security-plugin-types-common", - "@kbn/ml-plugin" + "@kbn/ml-plugin", + "@kbn/expect", + "@kbn/apm-synthtrace-client", + "@kbn/apm-synthtrace" ], "exclude": ["target/**/*"] }