Skip to content

Commit 5218806

Browse files
[Alerting] Handle when an Alerting Task fails due to its Alert object being deleted mid flight (#63093) (#63564)
Detects if a task run failed due to the task SO being deleted mid flight and if so writes debug logs instead of warnings. Detects if an Alerting task run failed due to the alert SO being deleted mid flight of the task and if so ensures the task doesn't reschedule itself (as it usually would with other types of tasks). Ensures that the operation of deleting or disabling an Alert won't fail if it fails to delete an already deleted task (a task might preemptively self delete if its underlying alert object was deleted, even if the overall delete operation wasn't deleted). Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>
1 parent 79cff4a commit 5218806

File tree

12 files changed

+240
-21
lines changed

12 files changed

+240
-21
lines changed

x-pack/plugins/alerting/server/alerts_client.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ import {
3434
import { EncryptedSavedObjectsPluginStart } from '../../../plugins/encrypted_saved_objects/server';
3535
import { TaskManagerStartContract } from '../../../plugins/task_manager/server';
3636
import { taskInstanceToAlertTaskInstance } from './task_runner/alert_task_instance';
37+
import { deleteTaskIfItExists } from './lib/delete_task_if_it_exists';
3738

3839
type NormalizedAlertAction = Omit<AlertAction, 'actionTypeId'>;
3940
export type CreateAPIKeyResult =
@@ -268,7 +269,7 @@ export class AlertsClient {
268269
const removeResult = await this.savedObjectsClient.delete('alert', id);
269270

270271
await Promise.all([
271-
taskIdToRemove ? this.taskManager.remove(taskIdToRemove) : null,
272+
taskIdToRemove ? deleteTaskIfItExists(this.taskManager, taskIdToRemove) : null,
272273
apiKeyToInvalidate ? this.invalidateApiKey({ apiKey: apiKeyToInvalidate }) : null,
273274
]);
274275

@@ -510,7 +511,9 @@ export class AlertsClient {
510511
);
511512

512513
await Promise.all([
513-
attributes.scheduledTaskId ? this.taskManager.remove(attributes.scheduledTaskId) : null,
514+
attributes.scheduledTaskId
515+
? deleteTaskIfItExists(this.taskManager, attributes.scheduledTaskId)
516+
: null,
514517
apiKeyToInvalidate ? this.invalidateApiKey({ apiKey: apiKeyToInvalidate }) : null,
515518
]);
516519
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
7+
import uuid from 'uuid';
8+
import { taskManagerMock } from '../../../task_manager/server/mocks';
9+
import { SavedObjectsErrorHelpers } from '../../../../../src/core/server';
10+
import { deleteTaskIfItExists } from './delete_task_if_it_exists';
11+
12+
describe('deleteTaskIfItExists', () => {
13+
test('removes the task by its ID', async () => {
14+
const tm = taskManagerMock.createStart();
15+
const id = uuid.v4();
16+
17+
expect(await deleteTaskIfItExists(tm, id)).toBe(undefined);
18+
19+
expect(tm.remove).toHaveBeenCalledWith(id);
20+
});
21+
22+
test('handles 404 errors caused by the task not existing', async () => {
23+
const tm = taskManagerMock.createStart();
24+
const id = uuid.v4();
25+
26+
tm.remove.mockRejectedValue(SavedObjectsErrorHelpers.createGenericNotFoundError('task', id));
27+
28+
expect(await deleteTaskIfItExists(tm, id)).toBe(undefined);
29+
30+
expect(tm.remove).toHaveBeenCalledWith(id);
31+
});
32+
33+
test('throws if any other errro is caused by task removal', async () => {
34+
const tm = taskManagerMock.createStart();
35+
const id = uuid.v4();
36+
37+
const error = SavedObjectsErrorHelpers.createInvalidVersionError(uuid.v4());
38+
tm.remove.mockRejectedValue(error);
39+
40+
expect(deleteTaskIfItExists(tm, id)).rejects.toBe(error);
41+
42+
expect(tm.remove).toHaveBeenCalledWith(id);
43+
});
44+
});
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
import { TaskManagerStartContract } from '../../../task_manager/server';
7+
import { SavedObjectsErrorHelpers } from '../../../../../src/core/server';
8+
9+
export async function deleteTaskIfItExists(taskManager: TaskManagerStartContract, taskId: string) {
10+
try {
11+
await taskManager.remove(taskId);
12+
} catch (err) {
13+
if (!SavedObjectsErrorHelpers.isNotFoundError(err)) {
14+
throw err;
15+
}
16+
}
17+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
7+
import { isAlertSavedObjectNotFoundError } from './is_alert_not_found_error';
8+
import { SavedObjectsErrorHelpers } from '../../../../../src/core/server';
9+
import uuid from 'uuid';
10+
11+
describe('isAlertSavedObjectNotFoundError', () => {
12+
test('identifies SavedObjects Not Found errors', () => {
13+
const id = uuid.v4();
14+
// ensure the error created by SO parses as a string with the format we expect
15+
expect(
16+
`${SavedObjectsErrorHelpers.createGenericNotFoundError('alert', id)}`.includes(`alert/${id}`)
17+
).toBe(true);
18+
19+
const errorBySavedObjectsHelper = SavedObjectsErrorHelpers.createGenericNotFoundError(
20+
'alert',
21+
id
22+
);
23+
24+
expect(isAlertSavedObjectNotFoundError(errorBySavedObjectsHelper, id)).toBe(true);
25+
});
26+
27+
test('identifies generic errors', () => {
28+
const id = uuid.v4();
29+
expect(isAlertSavedObjectNotFoundError(new Error(`not found`), id)).toBe(false);
30+
});
31+
});
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
7+
import { SavedObjectsErrorHelpers } from '../../../../../src/core/server';
8+
9+
export function isAlertSavedObjectNotFoundError(err: Error, alertId: string) {
10+
return SavedObjectsErrorHelpers.isNotFoundError(err) && `${err}`.includes(alertId);
11+
}

x-pack/plugins/alerting/server/task_runner/task_runner.test.ts

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import { PluginStartContract as ActionsPluginStart } from '../../../actions/serv
1616
import { actionsMock } from '../../../actions/server/mocks';
1717
import { eventLoggerMock } from '../../../event_log/server/event_logger.mock';
1818
import { IEventLogger } from '../../../event_log/server';
19+
import { SavedObjectsErrorHelpers } from '../../../../../src/core/server';
1920

2021
const alertType = {
2122
id: 'test',
@@ -665,4 +666,36 @@ describe('Task Runner', () => {
665666
}
666667
`);
667668
});
669+
670+
test('avoids rescheduling a failed Alert Task Runner when it throws due to failing to fetch the alert', async () => {
671+
savedObjectsClient.get.mockImplementation(() => {
672+
throw SavedObjectsErrorHelpers.createGenericNotFoundError('task', '1');
673+
});
674+
675+
const taskRunner = new TaskRunner(
676+
alertType,
677+
mockedTaskInstance,
678+
taskRunnerFactoryInitializerParams
679+
);
680+
681+
encryptedSavedObjectsPlugin.getDecryptedAsInternalUser.mockResolvedValueOnce({
682+
id: '1',
683+
type: 'alert',
684+
attributes: {
685+
apiKey: Buffer.from('123:abc').toString('base64'),
686+
},
687+
references: [],
688+
});
689+
690+
const runnerResult = await taskRunner.run();
691+
692+
expect(runnerResult).toMatchInlineSnapshot(`
693+
Object {
694+
"runAt": undefined,
695+
"state": Object {
696+
"previousStartedAt": 1970-01-01T00:00:00.000Z,
697+
},
698+
}
699+
`);
700+
});
668701
});

x-pack/plugins/alerting/server/task_runner/task_runner.ts

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,13 @@ import { taskInstanceToAlertTaskInstance } from './alert_task_instance';
2626
import { AlertInstances } from '../alert_instance/alert_instance';
2727
import { EVENT_LOG_ACTIONS } from '../plugin';
2828
import { IEvent, IEventLogger } from '../../../event_log/server';
29+
import { isAlertSavedObjectNotFoundError } from '../lib/is_alert_not_found_error';
2930

3031
const FALLBACK_RETRY_INTERVAL: IntervalSchedule = { interval: '5m' };
3132

3233
interface AlertTaskRunResult {
3334
state: AlertTaskState;
34-
runAt: Date;
35+
runAt: Date | undefined;
3536
}
3637

3738
interface AlertTaskInstance extends ConcreteTaskInstance {
@@ -328,22 +329,29 @@ export class TaskRunner {
328329
};
329330
},
330331
(err: Error) => {
331-
this.logger.error(`Executing Alert "${alertId}" has resulted in Error: ${err.message}`);
332+
const message = `Executing Alert "${alertId}" has resulted in Error: ${err.message}`;
333+
if (isAlertSavedObjectNotFoundError(err, alertId)) {
334+
this.logger.debug(message);
335+
} else {
336+
this.logger.error(message);
337+
}
332338
return {
333339
...originalState,
334340
previousStartedAt,
335341
};
336342
}
337343
),
338-
runAt: resolveErr<Date, Error>(runAt, () =>
339-
getNextRunAt(
340-
new Date(),
341-
// if we fail at this point we wish to recover but don't have access to the Alert's
342-
// attributes, so we'll use a default interval to prevent the underlying task from
343-
// falling into a failed state
344-
FALLBACK_RETRY_INTERVAL
345-
)
346-
),
344+
runAt: resolveErr<Date | undefined, Error>(runAt, err => {
345+
return isAlertSavedObjectNotFoundError(err, alertId)
346+
? undefined
347+
: getNextRunAt(
348+
new Date(),
349+
// if we fail at this point we wish to recover but don't have access to the Alert's
350+
// attributes, so we'll use a default interval to prevent the underlying task from
351+
// falling into a failed state
352+
FALLBACK_RETRY_INTERVAL
353+
);
354+
}),
347355
};
348356
}
349357
}

x-pack/plugins/oss_telemetry/server/lib/tasks/visualizations/task_runner.test.ts

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
* you may not use this file except in compliance with the Elastic License.
55
*/
66

7-
import moment from 'moment';
87
import {
98
getMockCallWithInternal,
109
getMockConfig,
@@ -13,6 +12,7 @@ import {
1312
} from '../../../test_utils';
1413
import { visualizationsTaskRunner } from './task_runner';
1514
import { TaskInstance } from '../../../../../task_manager/server';
15+
import { getNextMidnight } from '../../get_next_midnight';
1616

1717
describe('visualizationsTaskRunner', () => {
1818
let mockTaskInstance: TaskInstance;
@@ -41,12 +41,6 @@ describe('visualizationsTaskRunner', () => {
4141
});
4242

4343
test('Summarizes visualization response data', async () => {
44-
const getNextMidnight = () =>
45-
moment()
46-
.add(1, 'days')
47-
.startOf('day')
48-
.toDate();
49-
5044
const runner = visualizationsTaskRunner(mockTaskInstance, getMockConfig(), getMockEs());
5145
const result = await runner();
5246

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
7+
import { isTaskSavedObjectNotFoundError } from './is_task_not_found_error';
8+
import { SavedObjectsErrorHelpers } from '../../../../../src/core/server';
9+
import uuid from 'uuid';
10+
11+
describe('isTaskSavedObjectNotFoundError', () => {
12+
test('identifies SavedObjects Not Found errors', () => {
13+
const id = uuid.v4();
14+
// ensure the error created by SO parses as a string with the format we expect
15+
expect(
16+
`${SavedObjectsErrorHelpers.createGenericNotFoundError('task', id)}`.includes(`task/${id}`)
17+
).toBe(true);
18+
19+
const errorBySavedObjectsHelper = SavedObjectsErrorHelpers.createGenericNotFoundError(
20+
'task',
21+
id
22+
);
23+
24+
expect(isTaskSavedObjectNotFoundError(errorBySavedObjectsHelper, id)).toBe(true);
25+
});
26+
27+
test('identifies generic errors', () => {
28+
const id = uuid.v4();
29+
expect(isTaskSavedObjectNotFoundError(new Error(`not found`), id)).toBe(false);
30+
});
31+
});
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
7+
import { SavedObjectsErrorHelpers } from '../../../../../src/core/server';
8+
9+
export function isTaskSavedObjectNotFoundError(err: Error, taskId: string) {
10+
return SavedObjectsErrorHelpers.isNotFoundError(err) && `${err}`.includes(taskId);
11+
}

0 commit comments

Comments
 (0)