Skip to content

Commit 393505a

Browse files
authored
Migrationsv2: limit batch sizes to migrations.batchSizeBytes (= 100mb by default) (#109540)
* Fix logging for existing integration test * First stab at limiting batches to batchSizeBytes * Fix tests * Fix batch size calculation, NDJSON needs to be terminated by an empty line * Integration tests * Fix type failures * rename migration integration tests and log files to be consistent & more descriptive * Review feedback * Remove duplication of fatal error reasons * migrations.maxBatchSizeBytes to docker environment vars * docs for migrations.maxBatchSizeBytes
1 parent 249c5fb commit 393505a

32 files changed

+764
-103
lines changed

docs/setup/settings.asciidoc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -406,7 +406,10 @@ override this parameter to use their own Tile Map Service. For example:
406406
`"https://tiles.elastic.co/v2/default/{z}/{x}/{y}.png?elastic_tile_service_tos=agree&my_app_name=kibana"`
407407

408408
| `migrations.batchSize:`
409-
| Defines the number of documents migrated at a time. The higher the value, the faster the Saved Objects migration process performs at the cost of higher memory consumption. If the migration fails due to a `circuit_breaking_exception`, set a smaller `batchSize` value. *Default: `1000`*
409+
| Defines the number of documents migrated at a time. The higher the value, the faster the Saved Objects migration process performs at the cost of higher memory consumption. If upgrade migrations results in {kib} crashing with an out of memory exception or fails due to an Elasticsearch `circuit_breaking_exception`, use a smaller `batchSize` value to reduce the memory pressure. *Default: `1000`*
410+
411+
| `migrations.maxBatchSizeBytes:`
412+
| Defines the maximum payload size for indexing batches of upgraded saved objects to avoid migrations failing due to a 413 Request Entity Too Large response from Elasticsearch. This value should be lower than or equal to your Elasticsearch cluster's `http.max_content_length` configuration option. *Default: `100mb`*
410413

411414
| `migrations.enableV2:`
412415
| experimental[]. Enables the new Saved Objects migration algorithm. For information about the migration algorithm, refer to <<upgrade-migrations>>. When `migrations v2` is stable, the setting will be removed in an upcoming release without any further notice. Setting the value to `false` causes {kib} to use the legacy migration algorithm, which shipped in 7.11 and earlier versions. *Default: `true`*

src/core/server/saved_objects/migrations/kibana/kibana_migrator.mock.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import { buildActiveMappings } from '../core';
1111
const { mergeTypes } = jest.requireActual('./kibana_migrator');
1212
import { SavedObjectsType } from '../../types';
1313
import { BehaviorSubject } from 'rxjs';
14+
import { ByteSizeValue } from '@kbn/config-schema';
1415

1516
const defaultSavedObjectTypes: SavedObjectsType[] = [
1617
{
@@ -37,6 +38,7 @@ const createMigrator = (
3738
kibanaVersion: '8.0.0-testing',
3839
soMigrationsConfig: {
3940
batchSize: 100,
41+
maxBatchSizeBytes: ByteSizeValue.parse('30kb'),
4042
scrollDuration: '15m',
4143
pollInterval: 1500,
4244
skip: false,

src/core/server/saved_objects/migrations/kibana/kibana_migrator.test.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import { loggingSystemMock } from '../../../logging/logging_system.mock';
1515
import { SavedObjectTypeRegistry } from '../../saved_objects_type_registry';
1616
import { SavedObjectsType } from '../../types';
1717
import { DocumentMigrator } from '../core/document_migrator';
18+
import { ByteSizeValue } from '@kbn/config-schema';
1819
jest.mock('../core/document_migrator', () => {
1920
return {
2021
// Create a mock for spying on the constructor
@@ -396,6 +397,7 @@ const mockOptions = ({ enableV2 }: { enableV2: boolean } = { enableV2: false })
396397
} as KibanaMigratorOptions['kibanaConfig'],
397398
soMigrationsConfig: {
398399
batchSize: 20,
400+
maxBatchSizeBytes: ByteSizeValue.parse('20mb'),
399401
pollInterval: 20000,
400402
scrollDuration: '10m',
401403
skip: false,

src/core/server/saved_objects/migrationsv2/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,10 @@ completed this step:
316316
- temp index has a write block
317317
- temp index is not found
318318
### New control state
319+
1. If `currentBatch` is the last batch in `transformedDocBatches`
319320
`REINDEX_SOURCE_TO_TEMP_READ`
321+
2. If there are more batches left in `transformedDocBatches`
322+
`REINDEX_SOURCE_TO_TEMP_INDEX_BULK`
320323

321324
## REINDEX_SOURCE_TO_TEMP_CLOSE_PIT
322325
### Next action

src/core/server/saved_objects/migrationsv2/actions/bulk_overwrite_transformed_documents.ts

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,27 @@ import type {
2323
IndexNotFound,
2424
} from './index';
2525

26+
/**
27+
* Given a document and index, creates a valid body for the Bulk API.
28+
*/
29+
export const createBulkOperationBody = (doc: SavedObjectsRawDoc, index: string) => {
30+
return [
31+
{
32+
index: {
33+
_index: index,
34+
_id: doc._id,
35+
// overwrite existing documents
36+
op_type: 'index',
37+
// use optimistic concurrency control to ensure that outdated
38+
// documents are only overwritten once with the latest version
39+
if_seq_no: doc._seq_no,
40+
if_primary_term: doc._primary_term,
41+
},
42+
},
43+
doc._source,
44+
];
45+
};
46+
2647
/** @internal */
2748
export interface BulkOverwriteTransformedDocumentsParams {
2849
client: ElasticsearchClient;
@@ -47,6 +68,10 @@ export const bulkOverwriteTransformedDocuments = ({
4768
| RequestEntityTooLargeException,
4869
'bulk_index_succeeded'
4970
> => () => {
71+
const body = transformedDocs.flatMap((doc) => {
72+
return createBulkOperationBody(doc, index);
73+
});
74+
5075
return client
5176
.bulk({
5277
// Because we only add aliases in the MARK_VERSION_INDEX_READY step we
@@ -60,23 +85,7 @@ export const bulkOverwriteTransformedDocuments = ({
6085
wait_for_active_shards: WAIT_FOR_ALL_SHARDS_TO_BE_ACTIVE,
6186
refresh,
6287
filter_path: ['items.*.error'],
63-
body: transformedDocs.flatMap((doc) => {
64-
return [
65-
{
66-
index: {
67-
_index: index,
68-
_id: doc._id,
69-
// overwrite existing documents
70-
op_type: 'index',
71-
// use optimistic concurrency control to ensure that outdated
72-
// documents are only overwritten once with the latest version
73-
if_seq_no: doc._seq_no,
74-
if_primary_term: doc._primary_term,
75-
},
76-
},
77-
doc._source,
78-
];
79-
}),
88+
body,
8089
})
8190
.then((res) => {
8291
// Filter out version_conflict_engine_exception since these just mean

src/core/server/saved_objects/migrationsv2/initial_state.test.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
* Side Public License, v 1.
77
*/
88

9+
import { ByteSizeValue } from '@kbn/config-schema';
910
import * as Option from 'fp-ts/Option';
1011
import { SavedObjectsMigrationConfigType } from '../saved_objects_config';
1112
import { SavedObjectTypeRegistry } from '../saved_objects_type_registry';
@@ -21,6 +22,7 @@ describe('createInitialState', () => {
2122
const migrationsConfig = ({
2223
retryAttempts: 15,
2324
batchSize: 1000,
25+
maxBatchSizeBytes: ByteSizeValue.parse('100mb'),
2426
} as unknown) as SavedObjectsMigrationConfigType;
2527
it('creates the initial state for the model based on the passed in parameters', () => {
2628
expect(
@@ -37,6 +39,7 @@ describe('createInitialState', () => {
3739
})
3840
).toEqual({
3941
batchSize: 1000,
42+
maxBatchSizeBytes: ByteSizeValue.parse('100mb').getValueInBytes(),
4043
controlState: 'INIT',
4144
currentAlias: '.kibana_task_manager',
4245
excludeFromUpgradeFilterHooks: {},

src/core/server/saved_objects/migrationsv2/initial_state.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ export const createInitialState = ({
8282
retryDelay: 0,
8383
retryAttempts: migrationsConfig.retryAttempts,
8484
batchSize: migrationsConfig.batchSize,
85+
maxBatchSizeBytes: migrationsConfig.maxBatchSizeBytes.getValueInBytes(),
8586
logs: [],
8687
unusedTypesQuery: excludeUnusedTypesQuery,
8788
knownTypes,
Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ import { InternalCoreStart } from '../../../internal_types';
1717
import { Root } from '../../../root';
1818

1919
const kibanaVersion = Env.createDefault(REPO_ROOT, getEnvOptions()).packageInfo.version;
20-
const logFilePath = path.join(__dirname, 'migration_test_kibana.log');
20+
const logFilePath = path.join(__dirname, '7.7.2_xpack_100k.log');
2121

2222
async function removeLogFile() {
2323
// ignore errors if it doesn't exist
@@ -61,9 +61,12 @@ describe('migration from 7.7.2-xpack with 100k objects', () => {
6161
},
6262
},
6363
},
64-
root: {
65-
appenders: ['default', 'file'],
66-
},
64+
loggers: [
65+
{
66+
name: 'root',
67+
appenders: ['file'],
68+
},
69+
],
6770
},
6871
},
6972
{
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import * as kbnTestServer from '../../../../test_helpers/kbn_server';
1212
import { Root } from '../../../root';
1313
import { ElasticsearchClient } from '../../../elasticsearch';
1414

15-
const logFilePath = Path.join(__dirname, '7_13_failed_action_tasks_test.log');
15+
const logFilePath = Path.join(__dirname, '7_13_failed_action_tasks.log');
1616

1717
async function removeLogFile() {
1818
// ignore errors if it doesn't exist
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import Util from 'util';
1212
import * as kbnTestServer from '../../../../test_helpers/kbn_server';
1313
import { Root } from '../../../root';
1414

15-
const logFilePath = Path.join(__dirname, '7_13_corrupt_transform_failures_test.log');
15+
const logFilePath = Path.join(__dirname, '7_13_corrupt_transform_failures.log');
1616

1717
const asyncUnlink = Util.promisify(Fs.unlink);
1818

0 commit comments

Comments
 (0)