Skip to content

chore: flush after serializing big string #5401

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jul 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/server/rdb_save.cc
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,10 @@ io::Result<uint8_t> RdbSerializer::SaveEntry(const PrimeKey& pk, const PrimeValu
return make_unexpected(ec);
}

// We flush here because if the next element in the bucket we are serializing is a container,
// it will first serialize the first entry and then flush the internal buffer, even if
// crossed the limit.
FlushIfNeeded(FlushState::kFlushEndEntry);
return rdb_type;
}

Expand Down Expand Up @@ -776,6 +780,10 @@ error_code SerializerBase::FlushToSink(io::Sink* sink, SerializerBase::FlushStat
if (bytes.empty())
return error_code{};

if (bytes.size() > serialization_peak_bytes_) {
serialization_peak_bytes_ = bytes.size();
}

DVLOG(2) << "FlushToSink " << bytes.size() << " bytes";

// interrupt point.
Expand Down
6 changes: 6 additions & 0 deletions src/server/rdb_save.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,10 @@ class SerializerBase {
return SaveString(io::View(io::Bytes{buf, len}));
}

uint64_t GetSerializationPeakBytes() const {
return serialization_peak_bytes_;
}

protected:
// Prepare internal buffer for flush. Compress it.
io::Bytes PrepareFlush(FlushState flush_state);
Expand Down Expand Up @@ -210,6 +214,8 @@ class SerializerBase {
base::PODArray<uint8_t> tmp_buf_;
std::unique_ptr<LZF_HSLOT[]> lzf_;
size_t number_of_chunks_ = 0;

uint64_t serialization_peak_bytes_ = 0;
};

class RdbSerializer : public SerializerBase {
Expand Down
3 changes: 2 additions & 1 deletion src/server/snapshot.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ size_t SliceSnapshot::GetThreadLocalMemoryUsage() {
}

bool SliceSnapshot::IsSnaphotInProgress() {
return tl_slice_snapshots.size() > 0;
return !tl_slice_snapshots.empty();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could not resist, I will refactor all those cases eventually

}

void SliceSnapshot::Start(bool stream_journal, SnapshotFlush allow_flush) {
Expand Down Expand Up @@ -114,6 +114,7 @@ void SliceSnapshot::Start(bool stream_journal, SnapshotFlush allow_flush) {
db_slice_->UnregisterOnMoved(moved_cb_id_);
}
consumer_->Finalize();
VLOG(1) << "Serialization peak bytes: " << serializer_->GetSerializationPeakBytes();
});
}

Expand Down
53 changes: 53 additions & 0 deletions tests/dragonfly/replication_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3387,3 +3387,56 @@ async def get_keys():
moved_saved = extract_int_after_prefix("moved_saved ", line)
logging.debug(f"Moved saves {moved_saved}")
assert moved_saved > 0


@dfly_args({"proactor_threads": 1})
async def test_big_strings(df_factory):
master = df_factory.create(
proactor_threads=1, serialization_max_chunk_size=1, vmodule="snapshot=1"
)
replica = df_factory.create(proactor_threads=1)

df_factory.start_all([master, replica])
c_master = master.client()
c_replica = replica.client()

# 500kb
value_size = 500_000

async def get_memory(client, field):
info = await client.info("memory")
return info[field]

capacity = await get_memory(c_master, "prime_capacity")

seeder = DebugPopulateSeeder(
key_target=int(capacity * 0.8),
data_size=value_size,
collection_size=1,
variance=1,
samples=1,
types=["STRING"],
)
await seeder.run(c_master)

# sanity
capacity = await get_memory(c_master, "prime_capacity")
assert capacity < 8000

await c_replica.execute_command(f"REPLICAOF localhost {master.port}")
await wait_for_replicas_state(c_replica)

# Check if replica data is consistent
replica_data = await DebugPopulateSeeder.capture(c_replica)
master_data = await DebugPopulateSeeder.capture(c_master)
assert master_data == replica_data

replica.stop()
master.stop()

lines = master.find_in_logs("Serialization peak bytes: ")
assert len(lines) == 1
# We test the serializtion path of command execution
line = lines[0]
peak_bytes = extract_int_after_prefix("Serialization peak bytes: ", line)
assert peak_bytes < value_size
Loading