Skip to content

Commit

Permalink
Skip searching through lsm tree for a target level when files overlap (
Browse files Browse the repository at this point in the history
…#12284)

Summary:
While ingesting multiple external files with key range overlap, current flow go through the lsm tree to do a search for a target level and later discard that result by defaulting back to L0. This PR improves this by just skip the search altogether.

The other change is to remove default to L0 for the combination of universal compaction + force global sequence number, which was initially added to meet a pre #7421  invariant.

Pull Request resolved: #12284

Test Plan:
Added unit test:
./external_sst_file_test --gtest_filter="*IngestFileWithGlobalSeqnoAssignedUniversal*"

Reviewed By: ajkr

Differential Revision: D53072238

Pulled By: jowlyzhang

fbshipit-source-id: 30943e2e284a7f23b495c0ea4c80cb166a34a8ac
  • Loading branch information
jowlyzhang authored and facebook-github-bot committed Jan 25, 2024
1 parent d82d179 commit 928aca8
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 10 deletions.
14 changes: 4 additions & 10 deletions db/external_sst_file_ingestion_job.cc
Original file line number Diff line number Diff line change
Expand Up @@ -886,16 +886,16 @@ Status ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile(
SequenceNumber* assigned_seqno) {
Status status;
*assigned_seqno = 0;
if (force_global_seqno) {
if (force_global_seqno || files_overlap_) {
*assigned_seqno = last_seqno + 1;
if (compaction_style == kCompactionStyleUniversal || files_overlap_) {
// If files overlap, we have to ingest them at level 0.
if (files_overlap_) {
file_to_ingest->picked_level = 0;
if (ingestion_options_.fail_if_not_bottommost_level) {
status = Status::TryAgain(
"Files cannot be ingested to Lmax. Please make sure key range of "
"Lmax does not overlap with files to ingest.");
return status;
}
file_to_ingest->picked_level = 0;
return status;
}
}
Expand Down Expand Up @@ -947,12 +947,6 @@ Status ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile(
target_level = lvl;
}
}
// If files overlap, we have to ingest them at level 0 and assign the newest
// sequence number
if (files_overlap_) {
target_level = 0;
*assigned_seqno = last_seqno + 1;
}

if (ingestion_options_.fail_if_not_bottommost_level &&
target_level < cfd_->NumberLevels() - 1) {
Expand Down
86 changes: 86 additions & 0 deletions db/external_sst_file_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1848,6 +1848,92 @@ TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoAssignedLevel) {
VerifyDBFromMap(true_data, &kcnt, false);
}

TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoAssignedUniversal) {
bool write_global_seqno = std::get<0>(GetParam());
bool verify_checksums_before_ingest = std::get<1>(GetParam());
Options options = CurrentOptions();
options.num_levels = 5;
options.compaction_style = kCompactionStyleUniversal;
options.disable_auto_compactions = true;
DestroyAndReopen(options);
std::vector<std::pair<std::string, std::string>> file_data;
std::map<std::string, std::string> true_data;

// Write 200 -> 250 into the bottommost level
for (int i = 200; i <= 250; i++) {
ASSERT_OK(Put(Key(i), "bottommost"));
true_data[Key(i)] = "bottommost";
}
CompactRangeOptions cro;
cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
ASSERT_EQ("0,0,0,0,1", FilesPerLevel());

// Take a snapshot to enforce global sequence number.
const Snapshot* snap = db_->GetSnapshot();

// Insert 100 -> 200 into the memtable
for (int i = 100; i <= 200; i++) {
ASSERT_OK(Put(Key(i), "memtable"));
true_data[Key(i)] = "memtable";
}

// Insert 0 -> 20 using AddFile
file_data.clear();
for (int i = 0; i <= 20; i++) {
file_data.emplace_back(Key(i), "L4");
}

ASSERT_OK(GenerateAndAddExternalFile(
options, file_data, -1, true, write_global_seqno,
verify_checksums_before_ingest, false, false, &true_data));

// This file don't overlap with anything in the DB, will go to L4
ASSERT_EQ("0,0,0,0,2", FilesPerLevel());

// Insert 80 -> 130 using AddFile
file_data.clear();
for (int i = 80; i <= 130; i++) {
file_data.emplace_back(Key(i), "L0");
}
ASSERT_OK(GenerateAndAddExternalFile(
options, file_data, -1, true, write_global_seqno,
verify_checksums_before_ingest, false, false, &true_data));

// This file overlap with the memtable, so it will flush it and add
// it self to L0
ASSERT_EQ("2,0,0,0,2", FilesPerLevel());

// Insert 30 -> 50 using AddFile
file_data.clear();
for (int i = 30; i <= 50; i++) {
file_data.emplace_back(Key(i), "L4");
}
ASSERT_OK(GenerateAndAddExternalFile(
options, file_data, -1, true, write_global_seqno,
verify_checksums_before_ingest, false, false, &true_data));

// This file don't overlap with anything in the DB and fit in L4 as well
ASSERT_EQ("2,0,0,0,3", FilesPerLevel());

// Insert 10 -> 40 using AddFile
file_data.clear();
for (int i = 10; i <= 40; i++) {
file_data.emplace_back(Key(i), "L3");
}
ASSERT_OK(GenerateAndAddExternalFile(
options, file_data, -1, true, write_global_seqno,
verify_checksums_before_ingest, false, false, &true_data));

// This file overlap with files in L4, we will ingest it into the last
// non-overlapping and non-empty level, in this case, it's L0.
ASSERT_EQ("3,0,0,0,3", FilesPerLevel());

size_t kcnt = 0;
VerifyDBFromMap(true_data, &kcnt, false);
db_->ReleaseSnapshot(snap);
}

TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoMemtableFlush) {
Options options = CurrentOptions();
DestroyAndReopen(options);
Expand Down

0 comments on commit 928aca8

Please sign in to comment.