Skip to content

Commit

Permalink
Add blob cache option in the column family options (#10155)
Browse files Browse the repository at this point in the history
Summary:
There is currently no caching mechanism for blobs, which is not ideal especially when the database resides on remote storage (where we cannot rely on the OS page cache). As part of this task, we would like to make it possible for the application to configure a blob cache.
This PR is a part of facebook/rocksdb#10156

Pull Request resolved: facebook/rocksdb#10155

Reviewed By: ltamasi

Differential Revision: D37150819

Pulled By: gangliao

fbshipit-source-id: b807c7916ea5d411588128f8e22a49f171388fe2
  • Loading branch information
gangliao authored and facebook-github-bot committed Jun 14, 2022
1 parent 1d2950b commit cba398d
Show file tree
Hide file tree
Showing 13 changed files with 82 additions and 8 deletions.
5 changes: 5 additions & 0 deletions db/c.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3048,6 +3048,11 @@ int rocksdb_options_get_blob_file_starting_level(rocksdb_options_t* opt) {
return opt->rep.blob_file_starting_level;
}

void rocksdb_options_set_blob_cache(rocksdb_options_t* opt,
rocksdb_cache_t* blob_cache) {
opt->rep.blob_cache = blob_cache->rep;
}

void rocksdb_options_set_num_levels(rocksdb_options_t* opt, int n) {
opt->rep.num_levels = n;
}
Expand Down
1 change: 1 addition & 0 deletions db/db_options_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ TEST_F(DBOptionsTest, SetMutableTableOptions) {

ColumnFamilyHandle* cfh = dbfull()->DefaultColumnFamily();
Options c_opts = dbfull()->GetOptions(cfh);

const auto* c_bbto =
c_opts.table_factory->GetOptions<BlockBasedTableOptions>();
ASSERT_NE(c_bbto, nullptr);
Expand Down
10 changes: 9 additions & 1 deletion include/rocksdb/advanced_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include <memory>

#include "rocksdb/cache.h"
#include "rocksdb/compression_type.h"
#include "rocksdb/memtablerep.h"
#include "rocksdb/universal_compaction.h"
Expand Down Expand Up @@ -227,7 +228,7 @@ enum class Temperature : uint8_t {
};

// The control option of how the cache tiers will be used. Currently rocksdb
// support block cahe (volatile tier), secondary cache (non-volatile tier).
// support block cache (volatile tier), secondary cache (non-volatile tier).
// In the future, we may add more caching layers.
enum class CacheTier : uint8_t {
kVolatileTier = 0,
Expand Down Expand Up @@ -953,6 +954,13 @@ struct AdvancedColumnFamilyOptions {
// Dynamically changeable through the SetOptions() API
int blob_file_starting_level = 0;

// This feature is WORK IN PROGRESS
// If non-NULL use the specified cache for blobs.
// If NULL, rocksdb will not use a blob cache.
//
// Default: nullptr (disabled)
std::shared_ptr<Cache> blob_cache = nullptr;

// Create ColumnFamilyOptions with default values for all fields
AdvancedColumnFamilyOptions();
// Create ColumnFamilyOptions from Options
Expand Down
3 changes: 3 additions & 0 deletions include/rocksdb/c.h
Original file line number Diff line number Diff line change
Expand Up @@ -1264,6 +1264,9 @@ extern ROCKSDB_LIBRARY_API void rocksdb_options_set_blob_file_starting_level(
extern ROCKSDB_LIBRARY_API int rocksdb_options_get_blob_file_starting_level(
rocksdb_options_t* opt);

extern ROCKSDB_LIBRARY_API void rocksdb_options_set_blob_cache(
rocksdb_options_t* opt, rocksdb_cache_t* blob_cache);

/* returns a pointer to a malloc()-ed, null terminated string */
extern ROCKSDB_LIBRARY_API char* rocksdb_options_statistics_get_string(
rocksdb_options_t* opt);
Expand Down
2 changes: 1 addition & 1 deletion include/rocksdb/file_system.h
Original file line number Diff line number Diff line change
Expand Up @@ -762,7 +762,7 @@ struct FSReadRequest {
// returns fewer bytes if end of file is hit (or `status` is not OK).
size_t len;

// A buffer that MultiRead() can optionally place data in. It can
// A buffer that MultiRead() can optionally place data in. It can
// ignore this and allocate its own buffer.
// The lifecycle of scratch will be until IO is completed.
//
Expand Down
1 change: 0 additions & 1 deletion include/rocksdb/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -1412,7 +1412,6 @@ struct Options : public DBOptions, public ColumnFamilyOptions {
Options* DisableExtraChecks();
};

//
// An application can issue a read request (via Get/Iterators) and specify
// if that read should process data that ALREADY resides on a specified cache
// level. For example, if an application specifies kBlockCacheTier then the
Expand Down
13 changes: 12 additions & 1 deletion options/cf_options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,16 @@ static std::unordered_map<std::string, OptionTypeInfo>
OptionTypeInfo::AsCustomSharedPtr<SstPartitionerFactory>(
offsetof(struct ImmutableCFOptions, sst_partitioner_factory),
OptionVerificationType::kByName, OptionTypeFlags::kAllowNull)},
{"blob_cache",
{offsetof(struct ImmutableCFOptions, blob_cache), OptionType::kUnknown,
OptionVerificationType::kNormal,
(OptionTypeFlags::kCompareNever | OptionTypeFlags::kDontSerialize),
// Parses the input value as a Cache
[](const ConfigOptions& opts, const std::string&,
const std::string& value, void* addr) {
auto* cache = static_cast<std::shared_ptr<Cache>*>(addr);
return Cache::CreateFromString(opts, value, cache);
}}},
};

const std::string OptionsHelper::kCFOptionsName = "ColumnFamilyOptions";
Expand Down Expand Up @@ -870,7 +880,8 @@ ImmutableCFOptions::ImmutableCFOptions(const ColumnFamilyOptions& cf_options)
cf_options.memtable_insert_with_hint_prefix_extractor),
cf_paths(cf_options.cf_paths),
compaction_thread_limiter(cf_options.compaction_thread_limiter),
sst_partitioner_factory(cf_options.sst_partitioner_factory) {}
sst_partitioner_factory(cf_options.sst_partitioner_factory),
blob_cache(cf_options.blob_cache) {}

ImmutableOptions::ImmutableOptions() : ImmutableOptions(Options()) {}

Expand Down
2 changes: 2 additions & 0 deletions options/cf_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ struct ImmutableCFOptions {
std::shared_ptr<ConcurrentTaskLimiter> compaction_thread_limiter;

std::shared_ptr<SstPartitionerFactory> sst_partitioner_factory;

std::shared_ptr<Cache> blob_cache;
};

struct ImmutableOptions : public ImmutableDBOptions, public ImmutableCFOptions {
Expand Down
9 changes: 8 additions & 1 deletion options/options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@ AdvancedColumnFamilyOptions::AdvancedColumnFamilyOptions(const Options& options)
blob_garbage_collection_force_threshold(
options.blob_garbage_collection_force_threshold),
blob_compaction_readahead_size(options.blob_compaction_readahead_size),
blob_file_starting_level(options.blob_file_starting_level) {
blob_file_starting_level(options.blob_file_starting_level),
blob_cache(options.blob_cache) {
assert(memtable_factory.get() != nullptr);
if (max_bytes_for_level_multiplier_additional.size() <
static_cast<unsigned int>(num_levels)) {
Expand Down Expand Up @@ -417,6 +418,12 @@ void ColumnFamilyOptions::Dump(Logger* log) const {
blob_compaction_readahead_size);
ROCKS_LOG_HEADER(log, " Options.blob_file_starting_level: %d",
blob_file_starting_level);
if (blob_cache) {
ROCKS_LOG_HEADER(log, " Options.blob_cache: %s",
blob_cache->Name());
ROCKS_LOG_HEADER(log, " blob_cache options: %s",
blob_cache->GetPrintableOptions().c_str());
}
} // ColumnFamilyOptions::Dump

void Options::Dump(Logger* log) const {
Expand Down
1 change: 1 addition & 0 deletions options/options_helper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ void UpdateColumnFamilyOptions(const ImmutableCFOptions& ioptions,
cf_opts->cf_paths = ioptions.cf_paths;
cf_opts->compaction_thread_limiter = ioptions.compaction_thread_limiter;
cf_opts->sst_partitioner_factory = ioptions.sst_partitioner_factory;
cf_opts->blob_cache = ioptions.blob_cache;

// TODO(yhchiang): find some way to handle the following derived options
// * max_file_size
Expand Down
9 changes: 7 additions & 2 deletions options/options_settable_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) {
// test is not updated accordingly.
// After adding an option, we need to make sure it is settable by
// GetColumnFamilyOptionsFromString() and add the option to the input
// string passed to GetColumnFamilyOptionsFromString()in this test.
// string passed to GetColumnFamilyOptionsFromString() in this test.
// If it is a complicated type, you also need to add the field to
// kColumnFamilyOptionsExcluded, and maybe add customized verification
// for it.
Expand All @@ -400,6 +400,8 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) {
{offsetof(struct ColumnFamilyOptions,
table_properties_collector_factories),
sizeof(ColumnFamilyOptions::TablePropertiesCollectorFactories)},
{offsetof(struct ColumnFamilyOptions, blob_cache),
sizeof(std::shared_ptr<Cache>)},
{offsetof(struct ColumnFamilyOptions, comparator), sizeof(Comparator*)},
{offsetof(struct ColumnFamilyOptions, merge_operator),
sizeof(std::shared_ptr<MergeOperator>)},
Expand Down Expand Up @@ -523,9 +525,12 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) {
"blob_file_starting_level=1;"
"bottommost_temperature=kWarm;"
"compaction_options_fifo={max_table_files_size=3;allow_"
"compaction=false;age_for_warm=1;};",
"compaction=false;age_for_warm=1;};"
"blob_cache=1M;",
new_options));

ASSERT_NE(new_options->blob_cache.get(), nullptr);

ASSERT_EQ(unset_bytes_base,
NumUnsetBytes(new_options_ptr, sizeof(ColumnFamilyOptions),
kColumnFamilyOptionsExcluded));
Expand Down
32 changes: 32 additions & 0 deletions options/options_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,22 @@ TEST_F(OptionsTest, GetColumnFamilyOptionsFromStringTest) {
ASSERT_TRUE(new_cf_opt.memtable_factory != nullptr);
ASSERT_EQ(std::string(new_cf_opt.memtable_factory->Name()), "SkipListFactory");
ASSERT_TRUE(new_cf_opt.memtable_factory->IsInstanceOf("SkipListFactory"));

// blob cache
ASSERT_OK(GetColumnFamilyOptionsFromString(
config_options, base_cf_opt,
"blob_cache={capacity=1M;num_shard_bits=4;"
"strict_capacity_limit=true;high_pri_pool_ratio=0.5;};",
&new_cf_opt));
ASSERT_NE(new_cf_opt.blob_cache, nullptr);
ASSERT_EQ(new_cf_opt.blob_cache->GetCapacity(), 1024UL * 1024UL);
ASSERT_EQ(static_cast<ShardedCache*>(new_cf_opt.blob_cache.get())
->GetNumShardBits(),
4);
ASSERT_EQ(new_cf_opt.blob_cache->HasStrictCapacityLimit(), true);
ASSERT_EQ(static_cast<LRUCache*>(new_cf_opt.blob_cache.get())
->GetHighPriPoolRatio(),
0.5);
}

TEST_F(OptionsTest, CompressionOptionsFromString) {
Expand Down Expand Up @@ -2767,6 +2783,22 @@ TEST_F(OptionsOldApiTest, GetColumnFamilyOptionsFromStringTest) {
&new_cf_opt));
ASSERT_TRUE(new_cf_opt.memtable_factory != nullptr);
ASSERT_TRUE(new_cf_opt.memtable_factory->IsInstanceOf("SkipListFactory"));

// blob cache
ASSERT_OK(GetColumnFamilyOptionsFromString(
base_cf_opt,
"blob_cache={capacity=1M;num_shard_bits=4;"
"strict_capacity_limit=true;high_pri_pool_ratio=0.5;};",
&new_cf_opt));
ASSERT_NE(new_cf_opt.blob_cache, nullptr);
ASSERT_EQ(new_cf_opt.blob_cache->GetCapacity(), 1024UL * 1024UL);
ASSERT_EQ(static_cast<ShardedCache*>(new_cf_opt.blob_cache.get())
->GetNumShardBits(),
4);
ASSERT_EQ(new_cf_opt.blob_cache->HasStrictCapacityLimit(), true);
ASSERT_EQ(static_cast<LRUCache*>(new_cf_opt.blob_cache.get())
->GetHighPriPoolRatio(),
0.5);
}

TEST_F(OptionsTest, SliceTransformCreateFromString) {
Expand Down
2 changes: 1 addition & 1 deletion table/block_based/block_based_table_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,7 @@ void BlockBasedTable::SetupBaseCacheKey(const TableProperties* properties,
// assert(!db_id.empty());

// Minimum block size is 5 bytes; therefore we can trim off two lower bits
// from offets. See GetCacheKey.
// from offsets. See GetCacheKey.
*out_base_cache_key = OffsetableCacheKey(db_id, db_session_id, file_num,
/*max_offset*/ file_size >> 2);
}
Expand Down

0 comments on commit cba398d

Please sign in to comment.