diff --git a/x-pack/plugins/alerting/server/usage/alerting_usage_collector.ts b/x-pack/plugins/alerting/server/usage/alerting_usage_collector.ts index 5efd2d7a491526..46cf58d838eccf 100644 --- a/x-pack/plugins/alerting/server/usage/alerting_usage_collector.ts +++ b/x-pack/plugins/alerting/server/usage/alerting_usage_collector.ts @@ -205,6 +205,9 @@ export function createAlertingUsageCollector( count_rules_with_tags: 0, count_rules_snoozed: 0, count_rules_muted: 0, + count_mw_total: 0, + count_mw_with_repeat_toggle_on: 0, + count_mw_with_filter_alert_toggle_on: 0, count_rules_with_muted_alerts: 0, count_connector_types_by_consumers: {}, count_rules_by_execution_status_per_day: {}, @@ -289,6 +292,9 @@ export function createAlertingUsageCollector( count_rules_by_notify_when: byNotifyWhenSchema, count_rules_snoozed: { type: 'long' }, count_rules_muted: { type: 'long' }, + count_mw_total: { type: 'long' }, + count_mw_with_repeat_toggle_on: { type: 'long' }, + count_mw_with_filter_alert_toggle_on: { type: 'long' }, count_rules_with_muted_alerts: { type: 'long' }, count_connector_types_by_consumers: { DYNAMIC_KEY: { DYNAMIC_KEY: { type: 'long' } } }, count_rules_by_execution_status_per_day: byStatusPerDaySchema, diff --git a/x-pack/plugins/alerting/server/usage/lib/get_telemetry_from_kibana.test.ts b/x-pack/plugins/alerting/server/usage/lib/get_telemetry_from_kibana.test.ts index f29602458fd505..7c06e9867dae30 100644 --- a/x-pack/plugins/alerting/server/usage/lib/get_telemetry_from_kibana.test.ts +++ b/x-pack/plugins/alerting/server/usage/lib/get_telemetry_from_kibana.test.ts @@ -6,11 +6,97 @@ */ import { elasticsearchServiceMock, loggingSystemMock } from '@kbn/core/server/mocks'; -import { getTotalCountAggregations, getTotalCountInUse } from './get_telemetry_from_kibana'; +import { + getTotalCountAggregations, + getTotalCountInUse, + getMWTelemetry, +} from './get_telemetry_from_kibana'; +import { savedObjectsClientMock } from '@kbn/core/server/mocks'; +import { MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE } from '../../../common'; +import { ISavedObjectsRepository } from '@kbn/core/server'; const elasticsearch = elasticsearchServiceMock.createStart(); const esClient = elasticsearch.client.asInternalUser; const logger: ReturnType = loggingSystemMock.createLogger(); +const savedObjectsClient = savedObjectsClientMock.create() as unknown as ISavedObjectsRepository; +const thrownError = new Error('Fail'); + +const mockedResponse = { + saved_objects: [ + { + id: '1', + type: MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE, + attributes: { + title: 'test_rule_1', + enabled: true, + duration: 1800000, + expirationDate: '2025-09-09T13:13:07.824Z', + events: [], + rRule: { + dtstart: '2024-09-09T13:13:02.054Z', + tzid: 'Europe/Stockholm', + freq: 0, + count: 1, + }, + createdBy: null, + updatedBy: null, + createdAt: '2024-09-09T13:13:07.825Z', + updatedAt: '2024-09-09T13:13:07.825Z', + scopedQuery: null, + }, + }, + { + id: '2', + type: MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE, + attributes: { + title: 'test_rule_2', + enabled: true, + duration: 1800000, + expirationDate: '2025-09-09T13:13:07.824Z', + events: [], + rRule: { + dtstart: '2024-09-09T13:13:02.054Z', + tzid: 'Europe/Stockholm', + freq: 3, + interval: 1, + byweekday: ['SU'], + }, + createdBy: null, + updatedBy: null, + createdAt: '2024-09-09T13:13:07.825Z', + updatedAt: '2024-09-09T13:13:07.825Z', + scopedQuery: { + filters: [], + kql: 'kibana.alert.job_errors_results.job_id : * ', + dsl: '{"bool":{"must":[],"filter":[{"bool":{"should":[{"exists":{"field":"kibana.alert.job_errors_results.job_id"}}],"minimum_should_match":1}}],"should":[],"must_not":[]}}', + }, + }, + }, + { + id: '3', + type: MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE, + attributes: { + title: 'test_rule_3', + enabled: true, + duration: 1800000, + expirationDate: '2025-09-09T13:13:07.824Z', + events: [], + rRule: { + dtstart: '2024-09-09T13:13:02.054Z', + tzid: 'Europe/Stockholm', + freq: 3, + interval: 1, + byweekday: ['TU'], + }, + createdBy: null, + updatedBy: null, + createdAt: '2024-09-09T13:13:07.825Z', + updatedAt: '2024-09-09T13:13:07.825Z', + scopedQuery: null, + }, + }, + ], +}; describe('kibana index telemetry', () => { beforeEach(() => { @@ -420,4 +506,94 @@ describe('kibana index telemetry', () => { }); }); }); + + describe('getMWTelemetry', () => { + test('should return MW telemetry', async () => { + savedObjectsClient.createPointInTimeFinder = jest.fn().mockReturnValue({ + close: jest.fn(), + find: jest.fn().mockImplementation(async function* () { + yield mockedResponse; + }), + }); + const telemetry = await getMWTelemetry({ + savedObjectsClient, + logger, + }); + + expect(savedObjectsClient.createPointInTimeFinder).toHaveBeenCalledWith({ + type: MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE, + namespaces: ['*'], + perPage: 100, + fields: ['rRule', 'scopedQuery'], + }); + expect(telemetry).toStrictEqual({ + count_mw_total: 3, + count_mw_with_repeat_toggle_on: 2, + count_mw_with_filter_alert_toggle_on: 1, + hasErrors: false, + }); + }); + }); + + test('should throw the error', async () => { + savedObjectsClient.createPointInTimeFinder = jest.fn().mockReturnValue({ + close: jest.fn(), + find: jest.fn().mockImplementation(async function* () { + throw thrownError; + }), + }); + + const telemetry = await getMWTelemetry({ + savedObjectsClient, + logger, + }); + + expect(savedObjectsClient.createPointInTimeFinder).toHaveBeenCalledWith({ + type: MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE, + namespaces: ['*'], + perPage: 100, + fields: ['rRule', 'scopedQuery'], + }); + + expect(telemetry).toStrictEqual({ + count_mw_total: 0, + count_mw_with_repeat_toggle_on: 0, + count_mw_with_filter_alert_toggle_on: 0, + hasErrors: true, + errorMessage: 'Fail', + }); + expect(logger.warn).toHaveBeenCalled(); + const loggerCall = logger.warn.mock.calls[0][0]; + const loggerMeta = logger.warn.mock.calls[0][1]; + expect(loggerCall).toBe('Error executing alerting telemetry task: getTotalMWCount - {}'); + expect(loggerMeta?.tags).toEqual(['alerting', 'telemetry-failed']); + expect(loggerMeta?.error?.stack_trace).toBeDefined(); + }); + + test('should stop on MW max limit count', async () => { + savedObjectsClient.createPointInTimeFinder = jest.fn().mockReturnValue({ + close: jest.fn(), + find: jest.fn().mockImplementation(async function* () { + yield mockedResponse; + }), + }); + const telemetry = await getMWTelemetry({ + savedObjectsClient, + logger, + maxDocuments: 1, + }); + + expect(savedObjectsClient.createPointInTimeFinder).toHaveBeenCalledWith({ + type: MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE, + namespaces: ['*'], + perPage: 100, + fields: ['rRule', 'scopedQuery'], + }); + expect(telemetry).toStrictEqual({ + count_mw_total: 2, + count_mw_with_repeat_toggle_on: 1, + count_mw_with_filter_alert_toggle_on: 1, + hasErrors: false, + }); + }); }); diff --git a/x-pack/plugins/alerting/server/usage/lib/get_telemetry_from_kibana.ts b/x-pack/plugins/alerting/server/usage/lib/get_telemetry_from_kibana.ts index fdfdbf1dbcfe67..756512815d901a 100644 --- a/x-pack/plugins/alerting/server/usage/lib/get_telemetry_from_kibana.ts +++ b/x-pack/plugins/alerting/server/usage/lib/get_telemetry_from_kibana.ts @@ -11,7 +11,7 @@ import type { AggregationsTermsAggregateBase, AggregationsStringTermsBucketKeys, } from '@elastic/elasticsearch/lib/api/typesWithBodyKey'; -import { ElasticsearchClient, Logger } from '@kbn/core/server'; +import { ElasticsearchClient, Logger, ISavedObjectsRepository } from '@kbn/core/server'; import { ConnectorsByConsumersBucket, @@ -23,6 +23,8 @@ import { AlertingUsage } from '../types'; import { NUM_ALERTING_RULE_TYPES } from '../alerting_usage_collector'; import { parseSimpleRuleTypeBucket } from './parse_simple_rule_type_bucket'; import { groupRulesBySearchType } from './group_rules_by_search_type'; +import { MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE } from '../../../common'; +import { MaintenanceWindowAttributes } from '../../data/maintenance_window/types'; interface Opts { esClient: ElasticsearchClient; @@ -30,6 +32,12 @@ interface Opts { logger: Logger; } +interface MWOpts { + savedObjectsClient: ISavedObjectsRepository; + logger: Logger; + maxDocuments?: number; +} + type GetTotalCountsResults = Pick< AlertingUsage, | 'count_total' @@ -48,6 +56,14 @@ type GetTotalCountsResults = Pick< | 'connectors_per_alert' > & { errorMessage?: string; hasErrors: boolean }; +type GetMWTelemetryResults = Pick< + AlertingUsage, + 'count_mw_total' | 'count_mw_with_repeat_toggle_on' | 'count_mw_with_filter_alert_toggle_on' +> & { + errorMessage?: string; + hasErrors: boolean; +}; + interface GetTotalCountInUseResults { countTotal: number; countByType: Record; @@ -56,6 +72,8 @@ interface GetTotalCountInUseResults { hasErrors: boolean; } +const TELEMETRY_MW_COUNT_LIMIT = 10000; + export async function getTotalCountAggregations({ esClient, alertIndex, @@ -490,3 +508,60 @@ export async function getTotalCountInUse({ }; } } + +export async function getMWTelemetry({ + savedObjectsClient, + logger, + maxDocuments = TELEMETRY_MW_COUNT_LIMIT, +}: MWOpts): Promise { + try { + const mwFinder = savedObjectsClient.createPointInTimeFinder({ + type: MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE, + namespaces: ['*'], + perPage: 100, + fields: ['rRule', 'scopedQuery'], + }); + + let countMWTotal = 0; + let countMWWithRepeatToggleON = 0; + let countMWWithFilterAlertToggleON = 0; + mwLoop: for await (const response of mwFinder.find()) { + for (const mwSavedObject of response.saved_objects) { + if (countMWTotal > maxDocuments) break mwLoop; + countMWTotal = countMWTotal + 1; + // scopedQuery property will be null if "Filter alerts" toggle will be off + if (mwSavedObject.attributes.scopedQuery) { + countMWWithFilterAlertToggleON = countMWWithFilterAlertToggleON + 1; + } + // interval property will be not in place if "Repeat" toggle will be off + if (Object.hasOwn(mwSavedObject.attributes.rRule, 'interval')) { + countMWWithRepeatToggleON = countMWWithRepeatToggleON + 1; + } + } + } + await mwFinder.close(); + + return { + hasErrors: false, + count_mw_total: countMWTotal, + count_mw_with_repeat_toggle_on: countMWWithRepeatToggleON, + count_mw_with_filter_alert_toggle_on: countMWWithFilterAlertToggleON, + }; + } catch (err) { + const errorMessage = err?.message ? err.message : err.toString(); + logger.warn( + `Error executing alerting telemetry task: getTotalMWCount - ${JSON.stringify(err)}`, + { + tags: ['alerting', 'telemetry-failed'], + error: { stack_trace: err?.stack }, + } + ); + return { + hasErrors: true, + errorMessage, + count_mw_total: 0, + count_mw_with_repeat_toggle_on: 0, + count_mw_with_filter_alert_toggle_on: 0, + }; + } +} diff --git a/x-pack/plugins/alerting/server/usage/task.ts b/x-pack/plugins/alerting/server/usage/task.ts index 0cc08db9112262..41db75032ef4dd 100644 --- a/x-pack/plugins/alerting/server/usage/task.ts +++ b/x-pack/plugins/alerting/server/usage/task.ts @@ -12,15 +12,19 @@ import { TaskManagerStartContract, IntervalSchedule, } from '@kbn/task-manager-plugin/server'; - import { getFailedAndUnrecognizedTasksPerDay } from './lib/get_telemetry_from_task_manager'; -import { getTotalCountAggregations, getTotalCountInUse } from './lib/get_telemetry_from_kibana'; +import { + getTotalCountAggregations, + getTotalCountInUse, + getMWTelemetry, +} from './lib/get_telemetry_from_kibana'; import { getExecutionsPerDayCount, getExecutionTimeoutsPerDayCount, } from './lib/get_telemetry_from_event_log'; import { stateSchemaByVersion, emptyState, type LatestTaskStateSchema } from './task_state'; import { RULE_SAVED_OBJECT_TYPE } from '../saved_objects'; +import { MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE } from '../../common'; export const TELEMETRY_TASK_TYPE = 'alerting_telemetry'; @@ -36,12 +40,6 @@ export function initializeAlertingTelemetry( registerAlertingTelemetryTask(logger, core, taskManager, eventLogIndex); } -export function scheduleAlertingTelemetry(logger: Logger, taskManager?: TaskManagerStartContract) { - if (taskManager) { - scheduleTasks(logger, taskManager).catch(() => {}); // it shouldn't reject, but just in case - } -} - function registerAlertingTelemetryTask( logger: Logger, core: CoreSetup, @@ -58,6 +56,12 @@ function registerAlertingTelemetryTask( }); } +export function scheduleAlertingTelemetry(logger: Logger, taskManager?: TaskManagerStartContract) { + if (taskManager) { + scheduleTasks(logger, taskManager).catch(() => {}); // it shouldn't reject, but just in case + } +} + async function scheduleTasks(logger: Logger, taskManager: TaskManagerStartContract) { try { await taskManager.ensureScheduled({ @@ -93,16 +97,26 @@ export function telemetryTaskRunner( .getStartServices() .then(([coreStart]) => coreStart.savedObjects.getIndexForType(RULE_SAVED_OBJECT_TYPE)); + const getSavedObjectClient = () => + core + .getStartServices() + .then(([coreStart]) => + coreStart.savedObjects.createInternalRepository([MAINTENANCE_WINDOW_SAVED_OBJECT_TYPE]) + ); + return { async run() { const esClient = await getEsClient(); const alertIndex = await getAlertIndex(); + const savedObjectsClient = await getSavedObjectClient(); + return Promise.all([ getTotalCountAggregations({ esClient, alertIndex, logger }), getTotalCountInUse({ esClient, alertIndex, logger }), getExecutionsPerDayCount({ esClient, eventLogIndex, logger }), getExecutionTimeoutsPerDayCount({ esClient, eventLogIndex, logger }), getFailedAndUnrecognizedTasksPerDay({ esClient, taskManagerIndex, logger }), + getMWTelemetry({ logger, savedObjectsClient }), ]) .then( ([ @@ -111,13 +125,15 @@ export function telemetryTaskRunner( dailyExecutionCounts, dailyExecutionTimeoutCounts, dailyFailedAndUnrecognizedTasks, + MWTelemetry, ]) => { const hasErrors = totalCountAggregations.hasErrors || totalInUse.hasErrors || dailyExecutionCounts.hasErrors || dailyExecutionTimeoutCounts.hasErrors || - dailyFailedAndUnrecognizedTasks.hasErrors; + dailyFailedAndUnrecognizedTasks.hasErrors || + MWTelemetry.hasErrors; const errorMessages = [ totalCountAggregations.errorMessage, @@ -125,6 +141,7 @@ export function telemetryTaskRunner( dailyExecutionCounts.errorMessage, dailyExecutionTimeoutCounts.errorMessage, dailyFailedAndUnrecognizedTasks.errorMessage, + MWTelemetry.errorMessage, ].filter((message) => message !== undefined); const updatedState: LatestTaskStateSchema = { @@ -147,6 +164,10 @@ export function telemetryTaskRunner( count_rules_by_notify_when: totalCountAggregations.count_rules_by_notify_when, count_rules_snoozed: totalCountAggregations.count_rules_snoozed, count_rules_muted: totalCountAggregations.count_rules_muted, + count_mw_total: MWTelemetry.count_mw_total, + count_mw_with_repeat_toggle_on: MWTelemetry.count_mw_with_repeat_toggle_on, + count_mw_with_filter_alert_toggle_on: + MWTelemetry.count_mw_with_filter_alert_toggle_on, count_rules_with_muted_alerts: totalCountAggregations.count_rules_with_muted_alerts, count_connector_types_by_consumers: totalCountAggregations.count_connector_types_by_consumers, diff --git a/x-pack/plugins/alerting/server/usage/task_state.ts b/x-pack/plugins/alerting/server/usage/task_state.ts index cbcabeb490b847..a9652ee8200a1f 100644 --- a/x-pack/plugins/alerting/server/usage/task_state.ts +++ b/x-pack/plugins/alerting/server/usage/task_state.ts @@ -146,6 +146,9 @@ export const stateSchemaByVersion = { }), count_rules_snoozed: schema.number(), count_rules_muted: schema.number(), + count_mw_total: schema.number(), + count_mw_with_repeat_toggle_on: schema.number(), + count_mw_with_filter_alert_toggle_on: schema.number(), count_rules_with_muted_alerts: schema.number(), count_connector_types_by_consumers: schema.recordOf( schema.string(), @@ -248,6 +251,9 @@ export const emptyState: LatestTaskStateSchema = { }, count_rules_snoozed: 0, count_rules_muted: 0, + count_mw_total: 0, + count_mw_with_repeat_toggle_on: 0, + count_mw_with_filter_alert_toggle_on: 0, count_rules_with_muted_alerts: 0, count_connector_types_by_consumers: {}, count_rules_namespaces: 0, diff --git a/x-pack/plugins/alerting/server/usage/types.ts b/x-pack/plugins/alerting/server/usage/types.ts index 15c0f0a962710d..ece69ace7ba5d4 100644 --- a/x-pack/plugins/alerting/server/usage/types.ts +++ b/x-pack/plugins/alerting/server/usage/types.ts @@ -41,6 +41,9 @@ export interface AlertingUsage { count_connector_types_by_consumers: Record>; count_rules_snoozed: number; count_rules_muted: number; + count_mw_total: number; + count_mw_with_repeat_toggle_on: number; + count_mw_with_filter_alert_toggle_on: number; count_rules_with_muted_alerts: number; count_rules_by_execution_status_per_day: Record; percentile_num_generated_actions_per_day: { diff --git a/x-pack/plugins/telemetry_collection_xpack/schema/xpack_plugins.json b/x-pack/plugins/telemetry_collection_xpack/schema/xpack_plugins.json index 11c1a0a7edee0f..ff79fcf4632a37 100644 --- a/x-pack/plugins/telemetry_collection_xpack/schema/xpack_plugins.json +++ b/x-pack/plugins/telemetry_collection_xpack/schema/xpack_plugins.json @@ -1724,6 +1724,15 @@ "count_rules_muted": { "type": "long" }, + "count_mw_total": { + "type": "long" + }, + "count_mw_with_repeat_toggle_on": { + "type": "long" + }, + "count_mw_with_filter_alert_toggle_on": { + "type": "long" + }, "count_rules_with_muted_alerts": { "type": "long" }, diff --git a/x-pack/test/alerting_api_integration/security_and_spaces/group2/tests/telemetry/alerting_and_actions_telemetry.ts b/x-pack/test/alerting_api_integration/security_and_spaces/group2/tests/telemetry/alerting_and_actions_telemetry.ts index 000ff81d2d2dc8..447a49bba4938b 100644 --- a/x-pack/test/alerting_api_integration/security_and_spaces/group2/tests/telemetry/alerting_and_actions_telemetry.ts +++ b/x-pack/test/alerting_api_integration/security_and_spaces/group2/tests/telemetry/alerting_and_actions_telemetry.ts @@ -90,6 +90,44 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F return ruleResponse.body.id; } + async function createMaintenanceWindow({ + spaceId, + interval, + scopedQuery = null, + }: { + spaceId: string; + interval?: number; + scopedQuery?: { + filters: string[]; + kql: string; + dsl: string; + } | null; + }) { + const response = await supertestWithoutAuth + .post(`${getUrlPrefix(spaceId)}/internal/alerting/rules/maintenance_window`) + .set('kbn-xsrf', 'foo') + .auth(Superuser.username, Superuser.password) + .send({ + title: 'test-maintenance-window', + duration: 60 * 60 * 1000, // 1 hr + r_rule: { + dtstart: new Date().toISOString(), + tzid: 'UTC', + freq: 0, + count: 1, + ...(interval ? { interval } : {}), + }, + category_ids: ['management'], + scoped_query: scopedQuery, + }); + + expect(response.status).to.equal(200); + + objectRemover.add(spaceId, response.body.id, 'rules/maintenance_window', 'alerting', true); + + return response.body.id; + } + async function setup() { // Create rules and connectors in multiple spaces for (const space of Spaces) { @@ -216,6 +254,18 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F actions: [], }, }); + // MW with both toggles off + await createMaintenanceWindow({ spaceId: space.id }); + // MW with 'Repeat' toggle on and 'Filter alerts' toggle on + await createMaintenanceWindow({ + spaceId: space.id, + interval: 1, + scopedQuery: { + filters: [], + kql: 'kibana.alert.job_errors_results.job_id : * ', + dsl: '{"bool":{"must":[],"filter":[{"bool":{"should":[{"exists":{"field":"kibana.alert.job_errors_results.job_id"}}],"minimum_should_match":1}}],"should":[],"must_not":[]}}', + }, + }); } } @@ -500,6 +550,11 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F expect(telemetry.count_rules_by_execution_status_per_day.failure > 0).to.be(true); expect(telemetry.count_rules_by_execution_status_per_day.success > 0).to.be(true); + + // maintenance window telemetry + expect(telemetry.count_mw_total).to.equal(6); + expect(telemetry.count_mw_with_filter_alert_toggle_on).to.equal(3); + expect(telemetry.count_mw_with_repeat_toggle_on).to.equal(3); } it('should retrieve telemetry data in the expected format', async () => { @@ -527,7 +582,7 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F let actionsTelemetry: any; await retry.try(async () => { const telemetryTask = await es.get({ - id: `task:Actions-actions_telemetry`, + id: 'task:Actions-actions_telemetry', index: '.kibana_task_manager', }); expect(telemetryTask!._source!.task?.status).to.be('idle'); @@ -550,7 +605,7 @@ export default function createAlertingAndActionsTelemetryTests({ getService }: F let alertingTelemetry: any; await retry.try(async () => { const telemetryTask = await es.get({ - id: `task:Alerting-alerting_telemetry`, + id: 'task:Alerting-alerting_telemetry', index: '.kibana_task_manager', }); expect(telemetryTask!._source!.task?.status).to.be('idle');