Skip to content

Commit a47b38d

Browse files
[Monitoring] Thread pool rejections alert (#79433) (#82157)
* Thread pool rejections first draft * Split search and write rejections to seperate alerts * Code review feedback * Optimized page loading and bundle size * Increased monitoring bundle limit * Removed server app import into the frontend * Fixed tests and bundle size Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com> # Conflicts: # packages/kbn-optimizer/limits.yml
1 parent 86bf908 commit a47b38d

File tree

70 files changed

+1149
-395
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

70 files changed

+1149
-395
lines changed

packages/kbn-optimizer/limits.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ pageLoadAssetSize:
5454
mapsLegacy: 116961
5555
mapsLegacyLicensing: 20214
5656
ml: 82187
57-
monitoring: 268758
57+
monitoring: 50000
5858
navigation: 37413
5959
newsfeed: 42228
6060
observability: 89709

x-pack/plugins/monitoring/common/constants.ts

Lines changed: 168 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44
* you may not use this file except in compliance with the Elastic License.
55
*/
66

7+
import { i18n } from '@kbn/i18n';
8+
import { CommonAlertParamDetail } from './types/alerts';
9+
import { AlertParamType } from './enums';
10+
711
/**
812
* Helper string to add as a tag in every logging call
913
*/
@@ -215,15 +219,6 @@ export const REPORTING_SYSTEM_ID = 'reporting';
215219
*/
216220
export const TELEMETRY_COLLECTION_INTERVAL = 86400000;
217221

218-
/**
219-
* We want to slowly rollout the migration from watcher-based cluster alerts to
220-
* kibana alerts and we only want to enable the kibana alerts once all
221-
* watcher-based cluster alerts have been migrated so this flag will serve
222-
* as the only way to see the new UI and actually run Kibana alerts. It will
223-
* be false until all alerts have been migrated, then it will be removed
224-
*/
225-
export const KIBANA_CLUSTER_ALERTS_ENABLED = false;
226-
227222
/**
228223
* The prefix for all alert types used by monitoring
229224
*/
@@ -238,6 +233,168 @@ export const ALERT_KIBANA_VERSION_MISMATCH = `${ALERT_PREFIX}alert_kibana_versio
238233
export const ALERT_LOGSTASH_VERSION_MISMATCH = `${ALERT_PREFIX}alert_logstash_version_mismatch`;
239234
export const ALERT_MEMORY_USAGE = `${ALERT_PREFIX}alert_jvm_memory_usage`;
240235
export const ALERT_MISSING_MONITORING_DATA = `${ALERT_PREFIX}alert_missing_monitoring_data`;
236+
export const ALERT_THREAD_POOL_SEARCH_REJECTIONS = `${ALERT_PREFIX}alert_thread_pool_search_rejections`;
237+
export const ALERT_THREAD_POOL_WRITE_REJECTIONS = `${ALERT_PREFIX}alert_thread_pool_write_rejections`;
238+
239+
/**
240+
* Legacy alerts details/label for server and public use
241+
*/
242+
export const LEGACY_ALERT_DETAILS = {
243+
[ALERT_CLUSTER_HEALTH]: {
244+
label: i18n.translate('xpack.monitoring.alerts.clusterHealth.label', {
245+
defaultMessage: 'Cluster health',
246+
}),
247+
},
248+
[ALERT_ELASTICSEARCH_VERSION_MISMATCH]: {
249+
label: i18n.translate('xpack.monitoring.alerts.elasticsearchVersionMismatch.label', {
250+
defaultMessage: 'Elasticsearch version mismatch',
251+
}),
252+
},
253+
[ALERT_KIBANA_VERSION_MISMATCH]: {
254+
label: i18n.translate('xpack.monitoring.alerts.kibanaVersionMismatch.label', {
255+
defaultMessage: 'Kibana version mismatch',
256+
}),
257+
},
258+
[ALERT_LICENSE_EXPIRATION]: {
259+
label: i18n.translate('xpack.monitoring.alerts.licenseExpiration.label', {
260+
defaultMessage: 'License expiration',
261+
}),
262+
},
263+
[ALERT_LOGSTASH_VERSION_MISMATCH]: {
264+
label: i18n.translate('xpack.monitoring.alerts.logstashVersionMismatch.label', {
265+
defaultMessage: 'Logstash version mismatch',
266+
}),
267+
},
268+
[ALERT_NODES_CHANGED]: {
269+
label: i18n.translate('xpack.monitoring.alerts.nodesChanged.label', {
270+
defaultMessage: 'Nodes changed',
271+
}),
272+
},
273+
};
274+
275+
/**
276+
* Alerts details/label for server and public use
277+
*/
278+
export const ALERT_DETAILS = {
279+
[ALERT_CPU_USAGE]: {
280+
label: i18n.translate('xpack.monitoring.alerts.cpuUsage.label', {
281+
defaultMessage: 'CPU Usage',
282+
}),
283+
paramDetails: {
284+
threshold: {
285+
label: i18n.translate('xpack.monitoring.alerts.cpuUsage.paramDetails.threshold.label', {
286+
defaultMessage: `Notify when CPU is over`,
287+
}),
288+
type: AlertParamType.Percentage,
289+
} as CommonAlertParamDetail,
290+
duration: {
291+
label: i18n.translate('xpack.monitoring.alerts.cpuUsage.paramDetails.duration.label', {
292+
defaultMessage: `Look at the average over`,
293+
}),
294+
type: AlertParamType.Duration,
295+
} as CommonAlertParamDetail,
296+
},
297+
},
298+
[ALERT_DISK_USAGE]: {
299+
paramDetails: {
300+
threshold: {
301+
label: i18n.translate('xpack.monitoring.alerts.diskUsage.paramDetails.threshold.label', {
302+
defaultMessage: `Notify when disk capacity is over`,
303+
}),
304+
type: AlertParamType.Percentage,
305+
},
306+
duration: {
307+
label: i18n.translate('xpack.monitoring.alerts.diskUsage.paramDetails.duration.label', {
308+
defaultMessage: `Look at the average over`,
309+
}),
310+
type: AlertParamType.Duration,
311+
},
312+
},
313+
label: i18n.translate('xpack.monitoring.alerts.diskUsage.label', {
314+
defaultMessage: 'Disk Usage',
315+
}),
316+
},
317+
[ALERT_MEMORY_USAGE]: {
318+
paramDetails: {
319+
threshold: {
320+
label: i18n.translate('xpack.monitoring.alerts.memoryUsage.paramDetails.threshold.label', {
321+
defaultMessage: `Notify when memory usage is over`,
322+
}),
323+
type: AlertParamType.Percentage,
324+
},
325+
duration: {
326+
label: i18n.translate('xpack.monitoring.alerts.memoryUsage.paramDetails.duration.label', {
327+
defaultMessage: `Look at the average over`,
328+
}),
329+
type: AlertParamType.Duration,
330+
},
331+
},
332+
label: i18n.translate('xpack.monitoring.alerts.memoryUsage.label', {
333+
defaultMessage: 'Memory Usage (JVM)',
334+
}),
335+
},
336+
[ALERT_MISSING_MONITORING_DATA]: {
337+
paramDetails: {
338+
duration: {
339+
label: i18n.translate('xpack.monitoring.alerts.missingData.paramDetails.duration.label', {
340+
defaultMessage: `Notify if monitoring data is missing for the last`,
341+
}),
342+
type: AlertParamType.Duration,
343+
} as CommonAlertParamDetail,
344+
limit: {
345+
label: i18n.translate('xpack.monitoring.alerts.missingData.paramDetails.limit.label', {
346+
defaultMessage: `looking back`,
347+
}),
348+
type: AlertParamType.Duration,
349+
} as CommonAlertParamDetail,
350+
},
351+
label: i18n.translate('xpack.monitoring.alerts.missingData.label', {
352+
defaultMessage: 'Missing monitoring data',
353+
}),
354+
},
355+
[ALERT_THREAD_POOL_SEARCH_REJECTIONS]: {
356+
paramDetails: {
357+
threshold: {
358+
label: i18n.translate('xpack.monitoring.alerts.rejection.paramDetails.threshold.label', {
359+
defaultMessage: `Notify when {type} rejection count is over`,
360+
values: { type: 'search' },
361+
}),
362+
type: AlertParamType.Number,
363+
},
364+
duration: {
365+
label: i18n.translate('xpack.monitoring.alerts.rejection.paramDetails.duration.label', {
366+
defaultMessage: `In the last`,
367+
}),
368+
type: AlertParamType.Duration,
369+
},
370+
},
371+
label: i18n.translate('xpack.monitoring.alerts.threadPoolRejections.label', {
372+
defaultMessage: 'Thread pool {type} rejections',
373+
values: { type: 'search' },
374+
}),
375+
},
376+
[ALERT_THREAD_POOL_WRITE_REJECTIONS]: {
377+
paramDetails: {
378+
threshold: {
379+
label: i18n.translate('xpack.monitoring.alerts.rejection.paramDetails.threshold.label', {
380+
defaultMessage: `Notify when {type} rejection count is over`,
381+
values: { type: 'write' },
382+
}),
383+
type: AlertParamType.Number,
384+
},
385+
duration: {
386+
label: i18n.translate('xpack.monitoring.alerts.rejection.paramDetails.duration.label', {
387+
defaultMessage: `In the last`,
388+
}),
389+
type: AlertParamType.Duration,
390+
},
391+
},
392+
label: i18n.translate('xpack.monitoring.alerts.threadPoolRejections.label', {
393+
defaultMessage: 'Thread pool {type} rejections',
394+
values: { type: 'write' },
395+
}),
396+
},
397+
};
241398

242399
/**
243400
* A listing of all alert types
@@ -253,6 +410,8 @@ export const ALERTS = [
253410
ALERT_LOGSTASH_VERSION_MISMATCH,
254411
ALERT_MEMORY_USAGE,
255412
ALERT_MISSING_MONITORING_DATA,
413+
ALERT_THREAD_POOL_SEARCH_REJECTIONS,
414+
ALERT_THREAD_POOL_WRITE_REJECTIONS,
256415
];
257416

258417
/**

x-pack/plugins/monitoring/common/enums.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ export enum AlertMessageTokenType {
2525
export enum AlertParamType {
2626
Duration = 'duration',
2727
Percentage = 'percentage',
28+
Number = 'number',
2829
}
2930

3031
export enum SetupModeFeature {

x-pack/plugins/monitoring/common/types.ts

Lines changed: 0 additions & 53 deletions
This file was deleted.

x-pack/plugins/monitoring/server/alerts/types.d.ts renamed to x-pack/plugins/monitoring/common/types/alerts.ts

Lines changed: 73 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,70 @@
33
* or more contributor license agreements. Licensed under the Elastic License;
44
* you may not use this file except in compliance with the Elastic License.
55
*/
6-
import { AlertMessageTokenType, AlertSeverity } from '../../common/enums';
7-
import { AlertInstanceState as BaseAlertInstanceState } from '../../../alerts/server';
6+
7+
import { Alert } from '../../../alerts/common';
8+
import { AlertParamType, AlertMessageTokenType, AlertSeverity } from '../enums';
9+
10+
export interface CommonBaseAlert {
11+
type: string;
12+
label: string;
13+
paramDetails: CommonAlertParamDetails;
14+
rawAlert: Alert;
15+
isLegacy: boolean;
16+
}
17+
18+
export interface CommonAlertStatus {
19+
exists: boolean;
20+
enabled: boolean;
21+
states: CommonAlertState[];
22+
alert: CommonBaseAlert;
23+
}
24+
25+
export interface CommonAlertState {
26+
firing: boolean;
27+
state: any;
28+
meta: any;
29+
}
30+
31+
export interface CommonAlertFilter {
32+
nodeUuid?: string;
33+
}
34+
35+
export interface CommonAlertNodeUuidFilter extends CommonAlertFilter {
36+
nodeUuid: string;
37+
}
38+
39+
export interface CommonAlertStackProductFilter extends CommonAlertFilter {
40+
stackProduct: string;
41+
}
42+
43+
export interface CommonAlertParamDetail {
44+
label: string;
45+
type?: AlertParamType;
46+
}
47+
48+
export interface CommonAlertParamDetails {
49+
[name: string]: CommonAlertParamDetail | undefined;
50+
}
51+
52+
export interface CommonAlertParams {
53+
[name: string]: string | number;
54+
}
55+
56+
export interface ThreadPoolRejectionsAlertParams {
57+
threshold: number;
58+
duration: string;
59+
}
860

961
export interface AlertEnableAction {
1062
id: string;
1163
config: { [key: string]: any };
1264
}
1365

1466
export interface AlertInstanceState {
15-
alertStates: Array<AlertState | AlertCpuUsageState | AlertDiskUsageState>;
67+
alertStates: Array<
68+
AlertState | AlertCpuUsageState | AlertDiskUsageState | AlertThreadPoolRejectionsState
69+
>;
1670
[x: string]: unknown;
1771
}
1872

@@ -46,6 +100,13 @@ export interface AlertMemoryUsageState extends AlertNodeState {
46100
memoryUsage: number;
47101
}
48102

103+
export interface AlertThreadPoolRejectionsState extends AlertState {
104+
rejectionCount: number;
105+
type: string;
106+
nodeId: string;
107+
nodeName?: string;
108+
}
109+
49110
export interface AlertUiState {
50111
isFiring: boolean;
51112
severity: AlertSeverity;
@@ -100,6 +161,14 @@ export interface AlertCpuUsageNodeStats extends AlertNodeStats {
100161
containerQuota: number;
101162
}
102163

164+
export interface AlertThreadPoolRejectionsStats {
165+
clusterUuid: string;
166+
nodeId: string;
167+
nodeName: string;
168+
rejectionCount: number;
169+
ccs?: string;
170+
}
171+
103172
export interface AlertDiskUsageNodeStats extends AlertNodeStats {
104173
diskUsage: number;
105174
}
@@ -121,7 +190,7 @@ export interface AlertData {
121190
instanceKey: string;
122191
clusterUuid: string;
123192
ccs?: string;
124-
shouldFire: boolean;
193+
shouldFire?: boolean;
125194
severity: AlertSeverity;
126195
meta: any;
127196
}

0 commit comments

Comments
 (0)