Skip to content

Commit 09f8a2f

Browse files
authored
Start AOFRW before streaming repl buffer during fullsync (redis#13758)
During fullsync, before loading RDB on the replica, we stop aof child to prevent copy-on-write disaster. Once rdb is loaded, aof is started again and it will trigger aof rewrite. With redis#13732 , for rdbchannel replication, this behavior was changed. Currently, we start aof after replication buffer is streamed to db. This PR changes it back to start aof just after rdb is loaded (before repl buffer is streamed) Both approaches may have pros and cons. If we start aof before streaming repl buffers, we may still face with copy-on-write issues as repl buffers potentially include large amount of changes. If we wait until replication buffer drained, it means we are delaying starting aof persistence. Additional changes are introduced as part of this PR: - Interface change: Added `mem_replica_full_sync_buffer` field to the `INFO MEMORY` command reply. During full sync, it shows total memory consumed by accumulated replication stream buffer on replica. Added same metric to `MEMORY STATS` command reply as `replica.fullsync.buffer` field. - Fixes: - Count repl stream buffer size of replica as part of 'memory overhead' calculation for fields in "INFO MEMORY" and "MEMORY STATS" outputs. Before this PR, repl buffer was not counted as part of memory overhead calculation, causing misreports for fields like `used_memory_overhead` and `used_memory_dataset` in "INFO STATS" and for `overhead.total` field in "MEMORY STATS" command reply. - Dismiss replication stream buffers memory of replica in the fork to reduce COW impact during a fork. - Fixed a few time sensitive flaky tests, deleted a noop statement, fixed some comments and fail messages in rdbchannel tests.
1 parent 870b6bd commit 09f8a2f

File tree

6 files changed

+52
-18
lines changed

6 files changed

+52
-18
lines changed

src/commands/memory-stats.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@
2929
"replication.backlog": {
3030
"type": "integer"
3131
},
32+
"replica.fullsync.buffer": {
33+
"type": "integer"
34+
},
3235
"clients.slaves": {
3336
"type": "integer"
3437
},

src/object.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1209,6 +1209,9 @@ struct redisMemOverhead *getMemoryOverheadData(void) {
12091209
server.repl_backlog->blocks_index->numnodes * sizeof(raxNode) +
12101210
raxSize(server.repl_backlog->blocks_index) * sizeof(void*);
12111211
}
1212+
1213+
mh->replica_fullsync_buffer = server.repl_full_sync_buffer.mem_used;
1214+
mem_total += mh->replica_fullsync_buffer;
12121215
mem_total += mh->repl_backlog;
12131216
mem_total += mh->clients_slaves;
12141217

@@ -1560,7 +1563,7 @@ NULL
15601563
} else if (!strcasecmp(c->argv[1]->ptr,"stats") && c->argc == 2) {
15611564
struct redisMemOverhead *mh = getMemoryOverheadData();
15621565

1563-
addReplyMapLen(c,32+mh->num_dbs);
1566+
addReplyMapLen(c,33+mh->num_dbs);
15641567

15651568
addReplyBulkCString(c,"peak.allocated");
15661569
addReplyLongLong(c,mh->peak_allocated);
@@ -1574,6 +1577,9 @@ NULL
15741577
addReplyBulkCString(c,"replication.backlog");
15751578
addReplyLongLong(c,mh->repl_backlog);
15761579

1580+
addReplyBulkCString(c,"replica.fullsync.buffer");
1581+
addReplyLongLong(c,mh->replica_fullsync_buffer);
1582+
15771583
addReplyBulkCString(c,"clients.slaves");
15781584
addReplyLongLong(c,mh->clients_slaves);
15791585

src/replication.c

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2444,6 +2444,14 @@ void readSyncBulkPayload(connection *conn) {
24442444
/* Send the initial ACK immediately to put this replica in online state. */
24452445
if (usemark) replicationSendAck();
24462446

2447+
/* Restart the AOF subsystem now that we finished the sync. This
2448+
* will trigger an AOF rewrite, and when done will start appending
2449+
* to the new file. */
2450+
if (server.aof_enabled) {
2451+
serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Starting AOF after a successful sync");
2452+
startAppendOnlyWithRetry();
2453+
}
2454+
24472455
if (rdbchannel) {
24482456
int close_asap;
24492457

@@ -2465,13 +2473,6 @@ void readSyncBulkPayload(connection *conn) {
24652473
freeClientAsync(server.master);
24662474
}
24672475

2468-
/* Restart the AOF subsystem now that we finished the sync. This
2469-
* will trigger an AOF rewrite, and when done will start appending
2470-
* to the new file. */
2471-
if (server.aof_enabled) {
2472-
serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Starting AOF after a successful sync");
2473-
startAppendOnlyWithRetry();
2474-
}
24752476
return;
24762477

24772478
error:
@@ -3671,6 +3672,7 @@ static void rdbChannelReplDataBufInit(void) {
36713672
serverAssert(server.repl_full_sync_buffer.blocks == NULL);
36723673
server.repl_full_sync_buffer.size = 0;
36733674
server.repl_full_sync_buffer.used = 0;
3675+
server.repl_full_sync_buffer.mem_used = 0;
36743676
server.repl_full_sync_buffer.blocks = listCreate();
36753677
server.repl_full_sync_buffer.blocks->free = zfree;
36763678
}
@@ -3682,6 +3684,7 @@ static void rdbChannelReplDataBufFree(void) {
36823684
server.repl_full_sync_buffer.blocks = NULL;
36833685
server.repl_full_sync_buffer.size = 0;
36843686
server.repl_full_sync_buffer.used = 0;
3687+
server.repl_full_sync_buffer.mem_used = 0;
36853688
}
36863689

36873690
/* Replication: Replica side.
@@ -3752,6 +3755,7 @@ void rdbChannelBufferReplData(connection *conn) {
37523755

37533756
listAddNodeTail(server.repl_full_sync_buffer.blocks, tail);
37543757
server.repl_full_sync_buffer.size += tail->size;
3758+
server.repl_full_sync_buffer.mem_used += usable_size + sizeof(listNode);
37553759

37563760
/* Update buffer's peak */
37573761
if (server.repl_full_sync_buffer.peak < server.repl_full_sync_buffer.size)
@@ -3791,7 +3795,8 @@ int rdbChannelStreamReplDataToDb(client *c) {
37913795

37923796
server.repl_full_sync_buffer.used -= used;
37933797
server.repl_full_sync_buffer.size -= size;
3794-
3798+
server.repl_full_sync_buffer.mem_used -= (size + sizeof(listNode) +
3799+
sizeof(replDataBufBlock));
37953800
if (server.repl_debug_pause & REPL_DEBUG_ON_STREAMING_REPL_BUF)
37963801
debugPauseProcess();
37973802

src/server.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5818,6 +5818,7 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
58185818
"mem_not_counted_for_evict:%zu\r\n", freeMemoryGetNotCountedMemory(),
58195819
"mem_replication_backlog:%zu\r\n", mh->repl_backlog,
58205820
"mem_total_replication_buffers:%zu\r\n", server.repl_buffer_mem,
5821+
"mem_replica_full_sync_buffer:%zu\r\n", server.repl_full_sync_buffer.mem_used,
58215822
"mem_clients_slaves:%zu\r\n", mh->clients_slaves,
58225823
"mem_clients_normal:%zu\r\n", mh->clients_normal,
58235824
"mem_cluster_links:%zu\r\n", mh->cluster_links,
@@ -6793,6 +6794,15 @@ void dismissMemoryInChild(void) {
67936794
dismissMemory(o, o->size);
67946795
}
67956796

6797+
/* Dismiss accumulated repl buffer on replica. */
6798+
if (server.repl_full_sync_buffer.blocks) {
6799+
listRewind(server.repl_full_sync_buffer.blocks, &li);
6800+
while((ln = listNext(&li))) {
6801+
replDataBufBlock *o = listNodeValue(ln);
6802+
dismissMemory(o, o->size);
6803+
}
6804+
}
6805+
67966806
/* Dismiss all clients memory. */
67976807
listRewind(server.clients, &li);
67986808
while((ln = listNext(&li))) {

src/server.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1200,6 +1200,7 @@ typedef struct replDataBufBlock {
12001200
* rdb channel replication on replica side. */
12011201
typedef struct replDataBuf {
12021202
list *blocks; /* List of replDataBufBlock */
1203+
size_t mem_used; /* Total allocated memory */
12031204
size_t size; /* Total number of bytes available in all blocks. */
12041205
size_t used; /* Total number of bytes actually used in all blocks. */
12051206
size_t peak; /* Peak number of bytes stored in all blocks. */
@@ -1498,6 +1499,7 @@ struct redisMemOverhead {
14981499
size_t total_allocated;
14991500
size_t startup_allocated;
15001501
size_t repl_backlog;
1502+
size_t replica_fullsync_buffer;
15011503
size_t clients_slaves;
15021504
size_t clients_normal;
15031505
size_t cluster_links;

tests/integration/replication-rdbchannel.tcl

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ start_server {tags {"repl external:skip"}} {
7979
$replica1 config set repl-rdb-channel yes
8080
$replica2 config set repl-rdb-channel no
8181

82+
set loglines [count_log_lines 0]
8283
set prev_forks [s 0 total_forks]
8384
$master set x 2
8485

@@ -87,9 +88,10 @@ start_server {tags {"repl external:skip"}} {
8788
$replica1 replicaof $master_host $master_port
8889
$replica2 replicaof $master_host $master_port
8990

90-
set res [wait_for_log_messages 0 {"*Starting BGSAVE* replicas sockets (rdb-channel)*"} 0 2000 10]
91-
set loglines [lindex $res 1]
92-
wait_for_log_messages 0 {"*Starting BGSAVE* replicas sockets*"} $loglines 2000 10
91+
# There will be two forks subsequently, one for rdbchannel
92+
# replica, another for the replica without rdbchannel config.
93+
wait_for_log_messages 0 {"*Starting BGSAVE* replicas sockets (rdb-channel)*"} $loglines 300 100
94+
wait_for_log_messages 0 {"*Starting BGSAVE* replicas sockets"} $loglines 300 100
9395

9496
wait_replica_online $master 0 100 100
9597
wait_replica_online $master 1 100 100
@@ -396,10 +398,10 @@ start_server {tags {"repl external:skip"}} {
396398
populate 20000 master 100 -1
397399

398400
$replica replicaof $master_host $master_port
399-
wait_for_condition 50 200 {
401+
wait_for_condition 100 200 {
400402
[s 0 loading] == 1
401403
} else {
402-
fail "[s 0 loading] sdsdad"
404+
fail "Replica did not start loading"
403405
}
404406

405407
# Generate some traffic for backlog ~2mb
@@ -465,12 +467,11 @@ start_server {tags {"repl external:skip"}} {
465467
fail "Sync did not start"
466468
}
467469

468-
# Wait for both replicas main conns to establish psync
470+
# Verify replicas are connected
469471
wait_for_condition 500 100 {
470472
[s -2 connected_slaves] == 2
471473
} else {
472-
fail "Replicas didn't establish psync:
473-
sync_partial_ok: [s -2 sync_partial_ok]"
474+
fail "Replicas didn't connect: [s -2 connected_slaves]"
474475
}
475476

476477
# kill one of the replicas
@@ -488,6 +489,14 @@ start_server {tags {"repl external:skip"}} {
488489
sync_full:[s -2 sync_full]
489490
connected_slaves: [s -2 connected_slaves]"
490491
}
492+
493+
# Wait until replica catches up
494+
wait_replica_online $master 0 200 100
495+
wait_for_condition 200 100 {
496+
[s 0 mem_replica_full_sync_buffer] == 0
497+
} else {
498+
fail "Replica did not consume buffer in time"
499+
}
491500
}
492501

493502
test "Test master aborts rdb delivery if all replicas are dropped" {
@@ -773,7 +782,6 @@ start_server {tags {"repl external:skip"}} {
773782

774783
# Speed up loading
775784
$replica config set key-load-delay 0
776-
stop_write_load $load_handle
777785

778786
# Wait until replica recovers and becomes online
779787
wait_replica_online $master 0 100 100

0 commit comments

Comments
 (0)