Skip to content

Commit e31ef52

Browse files
authored
[Task Manager] Fixed the behavior of the claiming tasks function failing, when inline scripts are disabled. (#94870)
* [Task Manager] Fixed the behavior of the claiming tasks funtion failing, when inline scripts are disabled. * added docs * fixed test * added tests * fixed due to comments * Fixed docs due to comments * extended TM configuration changes message with the possible errors description
1 parent 8101419 commit e31ef52

File tree

8 files changed

+185
-53
lines changed

8 files changed

+185
-53
lines changed

docs/user/production-considerations/task-manager-troubleshooting.asciidoc

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -706,3 +706,21 @@ These rough calculations give you a lower bound to the required throughput, whic
706706
Given these inferred attributes, it would be safe to assume that a single {kib} instance with default settings **would not** provide the required throughput. It is possible that scaling horizontally by adding a couple more {kib} instances will.
707707

708708
For details on scaling Task Manager, see <<task-manager-scaling-guidance>>.
709+
710+
[float]
711+
[[task-manager-cannot-operate-when-inline-scripts-are-disabled]]
712+
==== Inline scripts are disabled in {es}
713+
714+
*Problem*:
715+
716+
Tasks are not running, and the server logs contain the following error message:
717+
718+
[source, txt]
719+
--------------------------------------------------
720+
[warning][plugins][taskManager] Task Manager cannot operate when inline scripts are disabled in {es}
721+
--------------------------------------------------
722+
723+
*Solution*:
724+
725+
Inline scripts are a hard requirement for Task Manager to function.
726+
To enable inline scripting, see the Elasticsearch documentation for {ref}/modules-scripting-security.html#allowed-script-types-setting[configuring allowed script types setting].

x-pack/plugins/task_manager/server/integration_tests/managed_configuration.test.ts

Lines changed: 57 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@
66
*/
77

88
import sinon from 'sinon';
9-
import { savedObjectsRepositoryMock } from '../../../../../src/core/server/mocks';
9+
import {
10+
elasticsearchServiceMock,
11+
savedObjectsRepositoryMock,
12+
} from '../../../../../src/core/server/mocks';
1013
import { SavedObjectsErrorHelpers, Logger } from '../../../../../src/core/server';
1114
import { ADJUST_THROUGHPUT_INTERVAL } from '../lib/create_managed_configuration';
1215
import { TaskManagerPlugin, TaskManagerStartContract } from '../plugin';
@@ -19,6 +22,7 @@ describe('managed configuration', () => {
1922

2023
let clock: sinon.SinonFakeTimers;
2124
const savedObjectsClient = savedObjectsRepositoryMock.create();
25+
const esStart = elasticsearchServiceMock.createStart();
2226

2327
beforeEach(async () => {
2428
jest.resetAllMocks();
@@ -55,6 +59,7 @@ describe('managed configuration', () => {
5559
});
5660

5761
const coreStart = coreMock.createStart();
62+
coreStart.elasticsearch = esStart;
5863
coreStart.savedObjects.createInternalRepository.mockReturnValue(savedObjectsClient);
5964
taskManagerStart = await taskManager.start(coreStart);
6065

@@ -81,10 +86,10 @@ describe('managed configuration', () => {
8186
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
8287

8388
expect(logger.warn).toHaveBeenCalledWith(
84-
'Max workers configuration is temporarily reduced after Elasticsearch returned 1 "too many request" error(s).'
89+
'Max workers configuration is temporarily reduced after Elasticsearch returned 1 "too many request" and/or "execute [inline] script" error(s).'
8590
);
8691
expect(logger.debug).toHaveBeenCalledWith(
87-
'Max workers configuration changing from 10 to 8 after seeing 1 error(s)'
92+
'Max workers configuration changing from 10 to 8 after seeing 1 "too many request" and/or "execute [inline] script" error(s)'
8893
);
8994
expect(logger.debug).toHaveBeenCalledWith('Task pool now using 10 as the max worker value');
9095
});
@@ -105,10 +110,57 @@ describe('managed configuration', () => {
105110
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
106111

107112
expect(logger.warn).toHaveBeenCalledWith(
108-
'Poll interval configuration is temporarily increased after Elasticsearch returned 1 "too many request" error(s).'
113+
'Poll interval configuration is temporarily increased after Elasticsearch returned 1 "too many request" and/or "execute [inline] script" error(s).'
109114
);
110115
expect(logger.debug).toHaveBeenCalledWith(
111-
'Poll interval configuration changing from 3000 to 3600 after seeing 1 error(s)'
116+
'Poll interval configuration changing from 3000 to 3600 after seeing 1 "too many request" and/or "execute [inline] script" error(s)'
117+
);
118+
expect(logger.debug).toHaveBeenCalledWith('Task poller now using interval of 3600ms');
119+
});
120+
121+
test('should lower max workers when Elasticsearch returns "cannot execute [inline] scripts" error', async () => {
122+
esStart
123+
.createClient('taskManager')
124+
.asInternalUser.search.mockRejectedValueOnce(
125+
elasticsearchServiceMock.createErrorTransportRequestPromise(
126+
new Error('cannot execute [inline] scripts" error')
127+
)
128+
);
129+
130+
await expect(taskManagerStart.fetch({})).rejects.toThrowErrorMatchingInlineSnapshot(
131+
`"cannot execute [inline] scripts" error"`
132+
);
133+
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
134+
135+
expect(logger.warn).toHaveBeenCalledWith(
136+
'Max workers configuration is temporarily reduced after Elasticsearch returned 1 "too many request" and/or "execute [inline] script" error(s).'
137+
);
138+
expect(logger.debug).toHaveBeenCalledWith(
139+
'Max workers configuration changing from 10 to 8 after seeing 1 "too many request" and/or "execute [inline] script" error(s)'
140+
);
141+
expect(logger.debug).toHaveBeenCalledWith('Task pool now using 10 as the max worker value');
142+
});
143+
144+
test('should increase poll interval when Elasticsearch returns "cannot execute [inline] scripts" error', async () => {
145+
esStart
146+
.createClient('taskManager')
147+
.asInternalUser.search.mockRejectedValueOnce(
148+
elasticsearchServiceMock.createErrorTransportRequestPromise(
149+
new Error('cannot execute [inline] scripts" error')
150+
)
151+
);
152+
153+
await expect(taskManagerStart.fetch({})).rejects.toThrowErrorMatchingInlineSnapshot(
154+
`"cannot execute [inline] scripts" error"`
155+
);
156+
157+
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
158+
159+
expect(logger.warn).toHaveBeenCalledWith(
160+
'Poll interval configuration is temporarily increased after Elasticsearch returned 1 "too many request" and/or "execute [inline] script" error(s).'
161+
);
162+
expect(logger.debug).toHaveBeenCalledWith(
163+
'Poll interval configuration changing from 3000 to 3600 after seeing 1 "too many request" and/or "execute [inline] script" error(s)'
112164
);
113165
expect(logger.debug).toHaveBeenCalledWith('Task poller now using interval of 3600ms');
114166
});

x-pack/plugins/task_manager/server/lib/create_managed_configuration.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ describe('createManagedConfiguration()', () => {
9696
errors$.next(SavedObjectsErrorHelpers.createTooManyRequestsError('a', 'b'));
9797
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
9898
expect(logger.warn).toHaveBeenCalledWith(
99-
'Max workers configuration is temporarily reduced after Elasticsearch returned 1 "too many request" error(s).'
99+
'Max workers configuration is temporarily reduced after Elasticsearch returned 1 "too many request" and/or "execute [inline] script" error(s).'
100100
);
101101
});
102102

@@ -180,7 +180,7 @@ describe('createManagedConfiguration()', () => {
180180
errors$.next(SavedObjectsErrorHelpers.createTooManyRequestsError('a', 'b'));
181181
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
182182
expect(logger.warn).toHaveBeenCalledWith(
183-
'Poll interval configuration is temporarily increased after Elasticsearch returned 1 "too many request" error(s).'
183+
'Poll interval configuration is temporarily increased after Elasticsearch returned 1 "too many request" and/or "execute [inline] script" error(s).'
184184
);
185185
});
186186

x-pack/plugins/task_manager/server/lib/create_managed_configuration.ts

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import { interval, merge, of, Observable } from 'rxjs';
99
import { filter, mergeScan, map, scan, distinctUntilChanged, startWith } from 'rxjs/operators';
1010
import { SavedObjectsErrorHelpers } from '../../../../../src/core/server';
1111
import { Logger } from '../../../../../src/core/server';
12+
import { isEsCannotExecuteScriptError } from './identify_es_error';
1213

1314
const FLUSH_MARKER = Symbol('flush');
1415
export const ADJUST_THROUGHPUT_INTERVAL = 10 * 1000;
@@ -76,11 +77,11 @@ function createMaxWorkersScan(logger: Logger, startingMaxWorkers: number) {
7677
}
7778
if (newMaxWorkers !== previousMaxWorkers) {
7879
logger.debug(
79-
`Max workers configuration changing from ${previousMaxWorkers} to ${newMaxWorkers} after seeing ${errorCount} error(s)`
80+
`Max workers configuration changing from ${previousMaxWorkers} to ${newMaxWorkers} after seeing ${errorCount} "too many request" and/or "execute [inline] script" error(s)`
8081
);
8182
if (previousMaxWorkers === startingMaxWorkers) {
8283
logger.warn(
83-
`Max workers configuration is temporarily reduced after Elasticsearch returned ${errorCount} "too many request" error(s).`
84+
`Max workers configuration is temporarily reduced after Elasticsearch returned ${errorCount} "too many request" and/or "execute [inline] script" error(s).`
8485
);
8586
}
8687
}
@@ -105,11 +106,11 @@ function createPollIntervalScan(logger: Logger, startingPollInterval: number) {
105106
}
106107
if (newPollInterval !== previousPollInterval) {
107108
logger.debug(
108-
`Poll interval configuration changing from ${previousPollInterval} to ${newPollInterval} after seeing ${errorCount} error(s)`
109+
`Poll interval configuration changing from ${previousPollInterval} to ${newPollInterval} after seeing ${errorCount} "too many request" and/or "execute [inline] script" error(s)`
109110
);
110111
if (previousPollInterval === startingPollInterval) {
111112
logger.warn(
112-
`Poll interval configuration is temporarily increased after Elasticsearch returned ${errorCount} "too many request" error(s).`
113+
`Poll interval configuration is temporarily increased after Elasticsearch returned ${errorCount} "too many request" and/or "execute [inline] script" error(s).`
113114
);
114115
}
115116
}
@@ -121,7 +122,11 @@ function countErrors(errors$: Observable<Error>, countInterval: number): Observa
121122
return merge(
122123
// Flush error count at fixed interval
123124
interval(countInterval).pipe(map(() => FLUSH_MARKER)),
124-
errors$.pipe(filter((e) => SavedObjectsErrorHelpers.isTooManyRequestsError(e)))
125+
errors$.pipe(
126+
filter(
127+
(e) => SavedObjectsErrorHelpers.isTooManyRequestsError(e) || isEsCannotExecuteScriptError(e)
128+
)
129+
)
125130
).pipe(
126131
// When tag is "flush", reset the error counter
127132
// Otherwise increment the error counter

x-pack/plugins/task_manager/server/lib/identify_es_error.test.ts

Lines changed: 26 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -137,33 +137,32 @@ function generateESErrorWithResponse(
137137
rootCause: ESErrorCausedBy[] = [],
138138
causeBy: ESErrorCausedBy = {}
139139
) {
140-
return Object.assign(new Error(), {
141-
msg: '[illegal_argument_exception] cannot execute [inline] scripts',
142-
path: '/.kibana_task_manager/_update_by_query',
143-
query: {},
144-
body: '{"query":{}}',
145-
statusCode: 400,
146-
response: JSON.stringify({
147-
error: {
148-
root_cause: rootCause,
149-
type: 'search_phase_execution_exception',
150-
reason: 'all shards failed',
151-
phase: 'query',
152-
grouped: true,
153-
failed_shards: [
154-
{
155-
shard: 0,
156-
index: '.kibana_task_manager_1',
157-
node: '24A4QbjHSK6prvtopAKLKw',
158-
reason: {
159-
type: 'illegal_argument_exception',
160-
reason: 'cannot execute [inline] scripts',
140+
return {
141+
name: 'ResponseError',
142+
meta: {
143+
body: {
144+
error: {
145+
root_cause: rootCause,
146+
type: 'search_phase_execution_exception',
147+
reason: 'all shards failed',
148+
phase: 'query',
149+
grouped: true,
150+
failed_shards: [
151+
{
152+
shard: 0,
153+
index: '.kibana_task_manager_8.0.0_001',
154+
node: 'GJ7ekIWTT56-h-aC6Y89Gw',
155+
reason: {
156+
type: 'illegal_argument_exception',
157+
reason: 'cannot execute [inline] scripts',
158+
},
161159
},
162-
},
163-
],
164-
caused_by: causeBy,
160+
],
161+
caused_by: causeBy,
162+
},
163+
status: 400,
165164
},
166-
status: 400,
167-
}),
168-
});
165+
statusCode: 400,
166+
},
167+
};
169168
}

x-pack/plugins/task_manager/server/lib/identify_es_error.ts

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,27 @@ export interface ESError {
1616
caused_by?: ESErrorCausedBy;
1717
}
1818

19+
export interface ESErrorBody {
20+
error?: ESError;
21+
status?: number;
22+
}
23+
24+
export interface ESErrorMeta {
25+
body?: ESErrorBody;
26+
statusCode?: number;
27+
}
28+
export interface ElasticsearchResponseError {
29+
name?: string;
30+
meta?: ESErrorMeta;
31+
}
32+
1933
function extractCausedByChain(
2034
causedBy: ESErrorCausedBy = {},
2135
accumulator: string[] = []
2236
): string[] {
2337
const { reason, caused_by: innerCausedBy } = causedBy;
2438

25-
if (reason) {
39+
if (reason && !accumulator.includes(reason)) {
2640
accumulator.push(reason);
2741
}
2842

@@ -39,11 +53,15 @@ function extractCausedByChain(
3953
* @param err Object Error thrown by ES JS client
4054
* @return ES error cause
4155
*/
42-
export function identifyEsError(err: { response: string }) {
43-
const { response } = err;
44-
56+
export function identifyEsError(err: ElasticsearchResponseError) {
57+
if (!err.meta) {
58+
return [];
59+
}
60+
const {
61+
meta: { body: response },
62+
} = err;
4563
if (response) {
46-
const { error } = JSON.parse(response) as { error?: ESError };
64+
const { error } = response;
4765
if (error) {
4866
const { root_cause: rootCause = [], caused_by: causedBy } = error;
4967

@@ -58,3 +76,7 @@ export function identifyEsError(err: { response: string }) {
5876
}
5977
return [];
6078
}
79+
80+
export function isEsCannotExecuteScriptError(err: ElasticsearchResponseError): boolean {
81+
return identifyEsError(err).includes('cannot execute [inline] scripts');
82+
}

x-pack/plugins/task_manager/server/polling_lifecycle.test.ts

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import { TaskClaiming, ClaimOwnershipResult } from './queries/task_claiming';
1919
import type { TaskClaiming as TaskClaimingClass } from './queries/task_claiming';
2020
import { asOk, Err, isErr, isOk, Result } from './lib/result_type';
2121
import { FillPoolResult } from './lib/fill_pool';
22+
import { ElasticsearchResponseError } from './lib/identify_es_error';
2223

2324
let mockTaskClaiming = taskClaimingMock.create({});
2425
jest.mock('./queries/task_claiming', () => {
@@ -204,12 +205,46 @@ describe('TaskPollingLifecycle', () => {
204205
taskClaiming.claimAvailableTasksIfCapacityIsAvailable.mockImplementation(
205206
() =>
206207
new Observable<Result<ClaimOwnershipResult, FillPoolResult>>((observer) => {
207-
observer.error(
208-
Object.assign(new Error(), {
209-
response:
210-
'{"error":{"root_cause":[{"type":"illegal_argument_exception","reason":"cannot execute [inline] scripts"}],"type":"search_phase_execution_exception","reason":"all shards failed","phase":"query","grouped":true,"failed_shards":[{"shard":0,"index":".kibana_task_manager_1","node":"24A4QbjHSK6prvtopAKLKw","reason":{"type":"illegal_argument_exception","reason":"cannot execute [inline] scripts"}}],"caused_by":{"type":"illegal_argument_exception","reason":"cannot execute [inline] scripts","caused_by":{"type":"illegal_argument_exception","reason":"cannot execute [inline] scripts"}}},"status":400}',
211-
})
212-
);
208+
observer.error({
209+
name: 'ResponseError',
210+
meta: {
211+
body: {
212+
error: {
213+
root_cause: [
214+
{
215+
type: 'illegal_argument_exception',
216+
reason: 'cannot execute [inline] scripts',
217+
},
218+
],
219+
type: 'search_phase_execution_exception',
220+
reason: 'all shards failed',
221+
phase: 'query',
222+
grouped: true,
223+
failed_shards: [
224+
{
225+
shard: 0,
226+
index: '.kibana_task_manager_1',
227+
node: '24A4QbjHSK6prvtopAKLKw',
228+
reason: {
229+
type: 'illegal_argument_exception',
230+
reason: 'cannot execute [inline] scripts',
231+
},
232+
},
233+
],
234+
caused_by: {
235+
type: 'illegal_argument_exception',
236+
reason: 'cannot execute [inline] scripts',
237+
caused_by: {
238+
type: 'illegal_argument_exception',
239+
reason: 'cannot execute [inline] scripts',
240+
},
241+
},
242+
},
243+
status: 400,
244+
},
245+
},
246+
statusCode: 400,
247+
} as ElasticsearchResponseError);
213248
})
214249
);
215250

x-pack/plugins/task_manager/server/polling_lifecycle.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ import {
3939
import { TaskPool } from './task_pool';
4040
import { TaskManagerRunner, TaskRunner } from './task_running';
4141
import { TaskStore } from './task_store';
42-
import { identifyEsError } from './lib/identify_es_error';
42+
import { identifyEsError, isEsCannotExecuteScriptError } from './lib/identify_es_error';
4343
import { BufferedTaskStore } from './buffered_task_store';
4444
import { TaskTypeDictionary } from './task_type_dictionary';
4545
import { delayOnClaimConflicts } from './polling';
@@ -299,15 +299,16 @@ export function claimAvailableTasks(
299299
// we can identify the reason
300300
// if we can - we emit an FillPoolResult error rather than erroring out the wrapping Observable
301301
// returned by `claimAvailableTasks`
302-
if (identifyEsError(ex).includes('cannot execute [inline] scripts')) {
302+
if (isEsCannotExecuteScriptError(ex)) {
303303
logger.warn(
304304
`Task Manager cannot operate when inline scripts are disabled in Elasticsearch`
305305
);
306306
observer.next(asErr(FillPoolResult.Failed));
307307
observer.complete();
308308
} else {
309+
const esError = identifyEsError(ex);
309310
// as we could't identify the reason - we'll error out the wrapping Observable too
310-
observer.error(ex);
311+
observer.error(esError.length > 0 ? esError : ex);
311312
}
312313
},
313314
() => {

0 commit comments

Comments
 (0)