diff --git a/db/compaction_job.cc b/db/compaction_job.cc index 986653dedf9..dd7d7298585 100644 --- a/db/compaction_job.cc +++ b/db/compaction_job.cc @@ -171,12 +171,18 @@ struct CompactionJob::SubcompactionState { const std::vector& grandparents = compaction->grandparents(); // Scan to find earliest grandparent file that contains key. + int grandparent_index_moved = 0; while (grandparent_index < grandparents.size() && icmp->Compare(internal_key, grandparents[grandparent_index]->largest.Encode()) > 0) { if (seen_key) { overlapped_bytes += grandparents[grandparent_index]->fd.GetFileSize(); + if (grandparent_index == 0 || + grandparents[grandparent_index]->fd.GetFileSize() > + compaction->max_output_file_size() / 8) { + grandparent_index_moved++; + } } assert(grandparent_index + 1 >= grandparents.size() || icmp->Compare( @@ -187,8 +193,13 @@ struct CompactionJob::SubcompactionState { seen_key = true; if (overlapped_bytes + curr_file_size > - compaction->max_compaction_bytes()) { - // Too much overlap for current output; start new output + compaction->max_compaction_bytes() || + grandparent_index_moved > 1) { + // Start a new output file if: + // 1. Too much overlap for current output, or + // 2. there is at least one full file in parent level is between the + // previous key and current key, and the file size is more than + // 1/8 of the target file size of the output level. overlapped_bytes = 0; return true; } diff --git a/db/compaction_job_test.cc b/db/compaction_job_test.cc index 76bf46cebeb..57dbfea417f 100644 --- a/db/compaction_job_test.cc +++ b/db/compaction_job_test.cc @@ -80,6 +80,8 @@ class CompactionJobTest : public testing::Test { EXPECT_OK(env_->CreateDirIfMissing(dbname_)); db_options_.db_paths.emplace_back(dbname_, std::numeric_limits::max()); + mutable_cf_options_.target_file_size_base = 1024 * 1024; + mutable_cf_options_.max_compaction_bytes = 10 * 1024 * 1024; } std::string GenerateFileName(uint64_t file_number) { @@ -223,9 +225,10 @@ class CompactionJobTest : public testing::Test { void RunCompaction( const std::vector>& input_files, - const stl_wrappers::KVMap& expected_results, + const stl_wrappers::KVMap& expected_last_file_contents, const std::vector& snapshots = {}, - SequenceNumber earliest_write_conflict_snapshot = kMaxSequenceNumber) { + SequenceNumber earliest_write_conflict_snapshot = kMaxSequenceNumber, + int grandparent_level = 0, uint64_t num_output_files = 1) { auto cfd = versions_->GetColumnFamilySet()->GetDefault(); size_t num_input_files = 0; @@ -240,10 +243,18 @@ class CompactionJobTest : public testing::Test { num_input_files += level_files.size(); } + std::vector grandparent; + if (grandparent_level > 0) { + grandparent = + cfd->current()->storage_info()->LevelFiles(grandparent_level); + } + Compaction compaction(cfd->current()->storage_info(), *cfd->ioptions(), *cfd->GetLatestMutableCFOptions(), - compaction_input_files, 1, 1024 * 1024, - 10 * 1024 * 1024, 0, kNoCompression, {}, true); + compaction_input_files, 1, + mutable_cf_options_.target_file_size_base, + mutable_cf_options_.max_compaction_bytes, 0, + kNoCompression, grandparent, true); compaction.SetInputVersion(cfd->current()); LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL, db_options_.info_log.get()); @@ -266,15 +277,15 @@ class CompactionJobTest : public testing::Test { ASSERT_OK(compaction_job.Install(*cfd->GetLatestMutableCFOptions())); mutex_.Unlock(); - if (expected_results.size() == 0) { + if (expected_last_file_contents.size() == 0) { ASSERT_GE(compaction_job_stats_.elapsed_micros, 0U); ASSERT_EQ(compaction_job_stats_.num_input_files, num_input_files); ASSERT_EQ(compaction_job_stats_.num_output_files, 0U); } else { ASSERT_GE(compaction_job_stats_.elapsed_micros, 0U); ASSERT_EQ(compaction_job_stats_.num_input_files, num_input_files); - ASSERT_EQ(compaction_job_stats_.num_output_files, 1U); - mock_table_factory_->AssertLatestFile(expected_results); + ASSERT_EQ(compaction_job_stats_.num_output_files, num_output_files); + mock_table_factory_->AssertLatestFile(expected_last_file_contents); } } @@ -388,6 +399,102 @@ TEST_F(CompactionJobTest, SimpleNonLastLevel) { RunCompaction({lvl0_files, lvl1_files}, expected_results); } +TEST_F(CompactionJobTest, CutForMaxCompactionBytes) { + NewDB(); + mutable_cf_options_.target_file_size_base = 80; + mutable_cf_options_.max_compaction_bytes = 21; + + auto file1 = mock::MakeMockFile({ + {KeyStr("c", 5U, kTypeValue), "val2"}, + {KeyStr("n", 6U, kTypeValue), "val3"}, + }); + AddMockFile(file1); + + auto file2 = mock::MakeMockFile({{KeyStr("h", 3U, kTypeValue), "val"}, + {KeyStr("j", 4U, kTypeValue), "val"}}); + AddMockFile(file2, 1); + + // Create three L2 files, each size 8. + // max_compaction_bytes 21 means the compaction output in L1 will + // be cut to at least two files. + auto file3 = mock::MakeMockFile({{KeyStr("b", 1U, kTypeValue), "val"}, + {KeyStr("c", 1U, kTypeValue), "val"}, + {KeyStr("c1", 1U, kTypeValue), "val"}, + {KeyStr("c2", 1U, kTypeValue), "val"}, + {KeyStr("c3", 1U, kTypeValue), "val"}, + {KeyStr("c4", 1U, kTypeValue), "val"}, + {KeyStr("d", 1U, kTypeValue), "val"}, + {KeyStr("e", 2U, kTypeValue), "val"}}); + AddMockFile(file3, 2); + + auto file4 = mock::MakeMockFile({{KeyStr("h", 1U, kTypeValue), "val"}, + {KeyStr("i", 1U, kTypeValue), "val"}, + {KeyStr("i1", 1U, kTypeValue), "val"}, + {KeyStr("i2", 1U, kTypeValue), "val"}, + {KeyStr("i3", 1U, kTypeValue), "val"}, + {KeyStr("i4", 1U, kTypeValue), "val"}, + {KeyStr("j", 1U, kTypeValue), "val"}, + {KeyStr("k", 2U, kTypeValue), "val"}}); + AddMockFile(file4, 2); + + auto file5 = mock::MakeMockFile({{KeyStr("l", 1U, kTypeValue), "val"}, + {KeyStr("m", 1U, kTypeValue), "val"}, + {KeyStr("m1", 1U, kTypeValue), "val"}, + {KeyStr("m2", 1U, kTypeValue), "val"}, + {KeyStr("m3", 1U, kTypeValue), "val"}, + {KeyStr("m4", 1U, kTypeValue), "val"}, + {KeyStr("n", 1U, kTypeValue), "val"}, + {KeyStr("o", 2U, kTypeValue), "val"}}); + AddMockFile(file5, 2); + + auto expected_last_file = + mock::MakeMockFile({{KeyStr("n", 6U, kTypeValue), "val3"}}); + + SetLastSequence(6U); + auto lvl0_files = cfd_->current()->storage_info()->LevelFiles(0); + auto lvl1_files = cfd_->current()->storage_info()->LevelFiles(1); + RunCompaction({lvl0_files, lvl1_files}, expected_last_file, {}, + kMaxSequenceNumber, 2, 2); +} + +TEST_F(CompactionJobTest, CutToSkipGrandparetFile) { + NewDB(); + // Make sure the grandparent level file size (10) qualifies skipping. + mutable_cf_options_.target_file_size_base = 70; + + auto file1 = mock::MakeMockFile({ + {KeyStr("a", 5U, kTypeValue), "val2"}, + {KeyStr("z", 6U, kTypeValue), "val3"}, + }); + AddMockFile(file1); + + auto file2 = mock::MakeMockFile({{KeyStr("c", 3U, kTypeValue), "val"}, + {KeyStr("x", 4U, kTypeValue), "val"}}); + AddMockFile(file2, 1); + + auto file3 = mock::MakeMockFile({{KeyStr("b", 1U, kTypeValue), "val"}, + {KeyStr("d", 2U, kTypeValue), "val"}}); + AddMockFile(file3, 2); + + auto file4 = mock::MakeMockFile({{KeyStr("h", 1U, kTypeValue), "val"}, + {KeyStr("i", 2U, kTypeValue), "val"}}); + AddMockFile(file4, 2); + + auto file5 = mock::MakeMockFile({{KeyStr("v", 1U, kTypeValue), "val"}, + {KeyStr("y", 2U, kTypeValue), "val"}}); + AddMockFile(file5, 2); + + auto expected_last_file = + mock::MakeMockFile({{KeyStr("x", 4U, kTypeValue), "val"}, + {KeyStr("z", 6U, kTypeValue), "val3"}}); + + SetLastSequence(6U); + auto lvl0_files = cfd_->current()->storage_info()->LevelFiles(0); + auto lvl1_files = cfd_->current()->storage_info()->LevelFiles(1); + RunCompaction({lvl0_files, lvl1_files}, expected_last_file, {}, + kMaxSequenceNumber, 2, 2); +} + TEST_F(CompactionJobTest, SimpleMerge) { merge_op_ = MergeOperators::CreateStringAppendOperator(); NewDB(); diff --git a/db/db_test.cc b/db/db_test.cc index 0083b37b8f8..6e3837c4bb4 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -4990,8 +4990,9 @@ TEST_F(DBTest, SoftLimit) { dbfull()->TEST_WaitForCompact(); // Now there is one L1 file but doesn't trigger soft_rate_limit - // The L1 file size is around 30KB. - ASSERT_EQ(NumTableFilesAtLevel(1), 1); + // The L1 size is around 30KB. + ASSERT_GE(NumTableFilesAtLevel(1), 1); + ASSERT_LE(NumTableFilesAtLevel(1), 2); ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); // Only allow one compactin going through. @@ -5022,10 +5023,11 @@ TEST_F(DBTest, SoftLimit) { sleeping_task_low.WakeUp(); sleeping_task_low.WaitUntilSleeping(); - // Now there is one L1 file (around 60KB) which exceeds 50KB base by 10KB + // Now L1 size (around 60KB) exceeds 50KB base by 10KB // Given level multiplier 10, estimated pending compaction is around 100KB // doesn't trigger soft_pending_compaction_bytes_limit - ASSERT_EQ(NumTableFilesAtLevel(1), 1); + ASSERT_GE(NumTableFilesAtLevel(1), 1); + ASSERT_LE(NumTableFilesAtLevel(1), 4); ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay()); // Create 3 L0 files, making score of L0 to be 3, higher than L0. @@ -5041,11 +5043,12 @@ TEST_F(DBTest, SoftLimit) { sleeping_task_low.WakeUp(); sleeping_task_low.WaitUntilSleeping(); - // Now there is one L1 file (around 90KB) which exceeds 50KB base by 40KB + // Now L1 size is around 90KB which exceeds 50KB base by 40KB // L2 size is 360KB, so the estimated level fanout 4, estimated pending // compaction is around 200KB // triggerring soft_pending_compaction_bytes_limit - ASSERT_EQ(NumTableFilesAtLevel(1), 1); + ASSERT_GE(NumTableFilesAtLevel(1), 1); + ASSERT_LE(NumTableFilesAtLevel(1), 6); ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay()); sleeping_task_low.WakeUp();