Skip to content

Commit

Permalink
New backup meta schema, with file temperatures (#9660)
Browse files Browse the repository at this point in the history
Summary:
The primary goal of this change is to add support for backing up and
restoring (applying on restore) file temperature metadata, without
committing to either the DB manifest or the FS reported "current"
temperatures being exclusive "source of truth".

To achieve this goal, we need to add temperature information to backup
metadata, which requires updated backup meta schema. Fortunately I
prepared for this in facebook/rocksdb#8069, which began forward compatibility in version
6.19.0 for this kind of schema update. (Previously, backup meta schema
was not extensible! Making this schema update public will allow some
other "nice to have" features like taking backups with hard links, and
avoiding crc32c checksum computation when another checksum is already
available.) While schema version 2 is newly public, the default schema
version is still 1. Until we change the default, users will need to set
to 2 to enable features like temperature data backup+restore. New
metadata like temperature information will be ignored with a warning
in versions before this change and since 6.19.0. The metadata is
considered ignorable because a functioning DB can be restored without
it.

Some detail:
* Some renaming because "future schema" is now just public schema 2.
* Initialize some atomics in TestFs (linter reported)
* Add temperature hint support to SstFileDumper (used by BackupEngine)

Pull Request resolved: facebook/rocksdb#9660

Test Plan:
related unit test majorly updated for the new functionality,
including some shared testing support for tracking temperatures in a FS.

Some other tests and testing hooks into production code also updated for
making the backup meta schema change public.

Reviewed By: ajkr

Differential Revision: D34686968

Pulled By: pdillinger

fbshipit-source-id: 3ac1fa3e67ee97ca8a5103d79cc87d872c1d862a
  • Loading branch information
pdillinger authored and facebook-github-bot committed Mar 18, 2022
1 parent 3bdbf67 commit cff0d1e
Show file tree
Hide file tree
Showing 13 changed files with 427 additions and 151 deletions.
26 changes: 14 additions & 12 deletions db/db_test2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6949,25 +6949,27 @@ TEST_F(DBTest2, CheckpointFileTemperature) {
temperatures.emplace(info.file_number, info.temperature);
}

test_fs->ClearRequestedFileTemperatures();
test_fs->PopRequestedSstFileTemperatures();
Checkpoint* checkpoint;
ASSERT_OK(Checkpoint::Create(db_, &checkpoint));
ASSERT_OK(
checkpoint->CreateCheckpoint(dbname_ + kFilePathSeparator + "tempcp"));

// checking src file src_temperature hints: 2 sst files: 1 sst is kWarm,
// another is kUnknown
auto file_temperatures = test_fs->RequestedSstFileTemperatures();
ASSERT_EQ(file_temperatures.size(), 2);
bool has_only_one_warm_sst = false;
for (const auto& file_temperature : file_temperatures) {
ASSERT_EQ(temperatures.at(file_temperature.first), file_temperature.second);
if (file_temperature.second == Temperature::kWarm) {
ASSERT_FALSE(has_only_one_warm_sst);
has_only_one_warm_sst = true;
}
}
ASSERT_TRUE(has_only_one_warm_sst);
std::vector<std::pair<uint64_t, Temperature>> requested_temps;
test_fs->PopRequestedSstFileTemperatures(&requested_temps);
// Two requests
ASSERT_EQ(requested_temps.size(), 2);
std::set<uint64_t> distinct_requests;
for (const auto& requested_temp : requested_temps) {
// Matching manifest temperatures
ASSERT_EQ(temperatures.at(requested_temp.first), requested_temp.second);
distinct_requests.insert(requested_temp.first);
}
// Each request to distinct file
ASSERT_EQ(distinct_requests.size(), requested_temps.size());

delete checkpoint;
Close();
}
Expand Down
101 changes: 88 additions & 13 deletions db/db_test_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <algorithm>
#include <cinttypes>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <thread>
Expand All @@ -23,12 +24,15 @@

#include "db/db_impl/db_impl.h"
#include "file/filename.h"
#include "rocksdb/advanced_options.h"
#include "rocksdb/cache.h"
#include "rocksdb/compaction_filter.h"
#include "rocksdb/convenience.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "rocksdb/file_system.h"
#include "rocksdb/filter_policy.h"
#include "rocksdb/io_status.h"
#include "rocksdb/options.h"
#include "rocksdb/slice.h"
#include "rocksdb/sst_file_writer.h"
Expand Down Expand Up @@ -698,29 +702,79 @@ class FileTemperatureTestFS : public FileSystemWrapper {
IOStatus NewSequentialFile(const std::string& fname, const FileOptions& opts,
std::unique_ptr<FSSequentialFile>* result,
IODebugContext* dbg) override {
auto filename = GetFileName(fname);
IOStatus s = target()->NewSequentialFile(fname, opts, result, dbg);
uint64_t number;
FileType type;
auto r = ParseFileName(filename, &number, &type);
assert(r);
if (type == kTableFile) {
auto emplaced =
requested_sst_file_temperatures_.emplace(number, opts.temperature);
assert(emplaced.second); // assume no duplication
if (ParseFileName(GetFileName(fname), &number, &type) &&
type == kTableFile) {
MutexLock lock(&mu_);
requested_sst_file_temperatures_.emplace_back(number, opts.temperature);
if (s.ok()) {
*result = WrapWithTemperature<FSSequentialFileOwnerWrapper>(
number, std::move(*result));
}
}
return s;
}

IOStatus NewRandomAccessFile(const std::string& fname,
const FileOptions& opts,
std::unique_ptr<FSRandomAccessFile>* result,
IODebugContext* dbg) override {
IOStatus s = target()->NewRandomAccessFile(fname, opts, result, dbg);
uint64_t number;
FileType type;
if (ParseFileName(GetFileName(fname), &number, &type) &&
type == kTableFile) {
MutexLock lock(&mu_);
requested_sst_file_temperatures_.emplace_back(number, opts.temperature);
if (s.ok()) {
*result = WrapWithTemperature<FSRandomAccessFileOwnerWrapper>(
number, std::move(*result));
}
}
return target()->NewSequentialFile(fname, opts, result, dbg);
return s;
}

const std::map<uint64_t, Temperature>& RequestedSstFileTemperatures() {
return requested_sst_file_temperatures_;
void PopRequestedSstFileTemperatures(
std::vector<std::pair<uint64_t, Temperature>>* out = nullptr) {
MutexLock lock(&mu_);
if (out) {
*out = std::move(requested_sst_file_temperatures_);
assert(requested_sst_file_temperatures_.empty());
} else {
requested_sst_file_temperatures_.clear();
}
}

void ClearRequestedFileTemperatures() {
requested_sst_file_temperatures_.clear();
IOStatus NewWritableFile(const std::string& fname, const FileOptions& opts,
std::unique_ptr<FSWritableFile>* result,
IODebugContext* dbg) override {
uint64_t number;
FileType type;
if (ParseFileName(GetFileName(fname), &number, &type) &&
type == kTableFile) {
MutexLock lock(&mu_);
current_sst_file_temperatures_[number] = opts.temperature;
}
return target()->NewWritableFile(fname, opts, result, dbg);
}

void CopyCurrentSstFileTemperatures(std::map<uint64_t, Temperature>* out) {
MutexLock lock(&mu_);
*out = current_sst_file_temperatures_;
}

void OverrideSstFileTemperature(uint64_t number, Temperature temp) {
MutexLock lock(&mu_);
current_sst_file_temperatures_[number] = temp;
}

protected:
std::map<uint64_t, Temperature> requested_sst_file_temperatures_;
port::Mutex mu_;
std::vector<std::pair<uint64_t, Temperature>>
requested_sst_file_temperatures_;
std::map<uint64_t, Temperature> current_sst_file_temperatures_;

std::string GetFileName(const std::string& fname) {
auto filename = fname.substr(fname.find_last_of(kFilePathSeparator) + 1);
Expand All @@ -729,6 +783,27 @@ class FileTemperatureTestFS : public FileSystemWrapper {
filename = filename.substr(filename.find_last_of('/') + 1);
return filename;
}

template <class FileOwnerWrapperT, /*inferred*/ class FileT>
std::unique_ptr<FileT> WrapWithTemperature(uint64_t number,
std::unique_ptr<FileT>&& t) {
class FileWithTemp : public FileOwnerWrapperT {
public:
FileWithTemp(FileTemperatureTestFS* fs, uint64_t number,
std::unique_ptr<FileT>&& t)
: FileOwnerWrapperT(std::move(t)), fs_(fs), number_(number) {}

Temperature GetTemperature() const override {
MutexLock lock(&fs_->mu_);
return fs_->current_sst_file_temperatures_[number_];
}

private:
FileTemperatureTestFS* fs_;
uint64_t number_;
};
return std::make_unique<FileWithTemp>(this, number, std::move(t));
}
};

class OnFileDeletionListener : public EventListener {
Expand Down
11 changes: 8 additions & 3 deletions db_stress_tool/db_stress_test_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1451,18 +1451,23 @@ Status StressTest::TestBackupRestore(
}
}
}
if (thread->rand.OneIn(2)) {
backup_opts.schema_version = 1;
} else {
backup_opts.schema_version = 2;
}
BackupEngine* backup_engine = nullptr;
std::string from = "a backup/restore operation";
Status s = BackupEngine::Open(db_stress_env, backup_opts, &backup_engine);
if (!s.ok()) {
from = "BackupEngine::Open";
}
if (s.ok()) {
if (thread->rand.OneIn(2)) {
TEST_FutureSchemaVersion2Options test_opts;
if (backup_opts.schema_version >= 2 && thread->rand.OneIn(2)) {
TEST_BackupMetaSchemaOptions test_opts;
test_opts.crc32c_checksums = thread->rand.OneIn(2) == 0;
test_opts.file_sizes = thread->rand.OneIn(2) == 0;
TEST_EnableWriteFutureSchemaVersion2(backup_engine, test_opts);
TEST_SetBackupMetaSchemaOptions(backup_engine, test_opts);
}
CreateBackupOptions create_opts;
if (FLAGS_disable_wal) {
Expand Down
16 changes: 16 additions & 0 deletions include/rocksdb/utilities/backup_engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,22 @@ struct BackupEngineOptions {
// and share_table_files are true.
ShareFilesNaming share_files_with_checksum_naming;

// Major schema version to use when writing backup meta files
// 1 (default) - compatible with very old versions of RocksDB.
// 2 - can be read by RocksDB versions >= 6.19.0. Minimum schema version for
// * (Experimental) saving and restoring file temperature metadata
int schema_version = 1;

// (Experimental - subject to change or removal) When taking a backup and
// saving file temperature info (minimum schema_version is 2), there are
// two potential sources of truth for the placement of files into temperature
// tiers: (a) the current file temperature reported by the FileSystem or
// (b) the expected file temperature recorded in DB manifest. When this
// option is false (default), (b) overrides (a) if both are not UNKNOWN.
// When true, (a) overrides (b) if both are not UNKNOWN. Regardless of this
// setting, a known temperature overrides UNKNOWN.
bool current_temperatures_override_manifest = false;

void Dump(Logger* logger) const;

explicit BackupEngineOptions(
Expand Down
6 changes: 6 additions & 0 deletions options/options_helper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,12 @@ std::map<CompactionStopStyle, std::string>
{kCompactionStopStyleSimilarSize, "kCompactionStopStyleSimilarSize"},
{kCompactionStopStyleTotalSize, "kCompactionStopStyleTotalSize"}};

std::map<Temperature, std::string> OptionsHelper::temperature_to_string = {
{Temperature::kUnknown, "kUnknown"},
{Temperature::kHot, "kHot"},
{Temperature::kWarm, "kWarm"},
{Temperature::kCold, "kCold"}};

std::unordered_map<std::string, ChecksumType>
OptionsHelper::checksum_type_string_map = {{"kNoChecksum", kNoChecksum},
{"kCRC32c", kCRC32c},
Expand Down
3 changes: 3 additions & 0 deletions options/options_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <string>
#include <vector>

#include "rocksdb/advanced_options.h"
#include "rocksdb/options.h"
#include "rocksdb/status.h"
#include "rocksdb/table.h"
Expand Down Expand Up @@ -77,6 +78,7 @@ struct OptionsHelper {
static std::map<CompactionPri, std::string> compaction_pri_to_string;
static std::map<CompactionStopStyle, std::string>
compaction_stop_style_to_string;
static std::map<Temperature, std::string> temperature_to_string;
static std::unordered_map<std::string, ChecksumType> checksum_type_string_map;
static std::unordered_map<std::string, CompressionType>
compression_type_string_map;
Expand All @@ -98,6 +100,7 @@ static auto& compaction_style_to_string =
static auto& compaction_pri_to_string = OptionsHelper::compaction_pri_to_string;
static auto& compaction_stop_style_to_string =
OptionsHelper::compaction_stop_style_to_string;
static auto& temperature_to_string = OptionsHelper::temperature_to_string;
static auto& checksum_type_string_map = OptionsHelper::checksum_type_string_map;
#ifndef ROCKSDB_LITE
static auto& compaction_stop_style_string_map =
Expand Down
16 changes: 9 additions & 7 deletions table/sst_file_dumper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,13 @@ namespace ROCKSDB_NAMESPACE {

SstFileDumper::SstFileDumper(const Options& options,
const std::string& file_path,
size_t readahead_size, bool verify_checksum,
bool output_hex, bool decode_blob_index,
const EnvOptions& soptions, bool silent)
Temperature file_temp, size_t readahead_size,
bool verify_checksum, bool output_hex,
bool decode_blob_index, const EnvOptions& soptions,
bool silent)
: file_name_(file_path),
read_num_(0),
file_temp_(file_temp),
output_hex_(output_hex),
decode_blob_index_(decode_blob_index),
soptions_(soptions),
Expand Down Expand Up @@ -82,8 +84,9 @@ Status SstFileDumper::GetTableReader(const std::string& file_path) {
const auto& fs = options_.env->GetFileSystem();
std::unique_ptr<FSRandomAccessFile> file;
uint64_t file_size = 0;
Status s = fs->NewRandomAccessFile(file_path, FileOptions(soptions_), &file,
nullptr);
FileOptions fopts = soptions_;
fopts.temperature = file_temp_;
Status s = fs->NewRandomAccessFile(file_path, fopts, &file, nullptr);
if (s.ok()) {
s = fs->GetFileSize(file_path, IOOptions(), &file_size, nullptr);
}
Expand Down Expand Up @@ -122,8 +125,7 @@ Status SstFileDumper::GetTableReader(const std::string& file_path) {
magic_number == kLegacyPlainTableMagicNumber) {
soptions_.use_mmap_reads = true;

fs->NewRandomAccessFile(file_path, FileOptions(soptions_), &file,
nullptr);
fs->NewRandomAccessFile(file_path, fopts, &file, nullptr);
file_.reset(new RandomAccessFileReader(std::move(file), file_path));
}

Expand Down
8 changes: 6 additions & 2 deletions table/sst_file_dumper.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,20 @@

#include <memory>
#include <string>

#include "db/dbformat.h"
#include "file/writable_file_writer.h"
#include "options/cf_options.h"
#include "rocksdb/advanced_options.h"

namespace ROCKSDB_NAMESPACE {

class SstFileDumper {
public:
explicit SstFileDumper(const Options& options, const std::string& file_name,
size_t readahead_size, bool verify_checksum,
bool output_hex, bool decode_blob_index,
Temperature file_temp, size_t readahead_size,
bool verify_checksum, bool output_hex,
bool decode_blob_index,
const EnvOptions& soptions = EnvOptions(),
bool silent = false);

Expand Down Expand Up @@ -71,6 +74,7 @@ class SstFileDumper {

std::string file_name_;
uint64_t read_num_;
Temperature file_temp_;
bool output_hex_;
bool decode_blob_index_;
EnvOptions soptions_;
Expand Down
3 changes: 2 additions & 1 deletion tools/ldb_cmd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3471,7 +3471,8 @@ void DumpSstFile(Options options, std::string filename, bool output_hex,
// no verification
// TODO: add support for decoding blob indexes in ldb as well
ROCKSDB_NAMESPACE::SstFileDumper dumper(
options, filename, 2 * 1024 * 1024 /* readahead_size */,
options, filename, Temperature::kUnknown,
2 * 1024 * 1024 /* readahead_size */,
/* verify_checksum */ false, output_hex,
/* decode_blob_index */ false);
Status st = dumper.ReadSequential(true, std::numeric_limits<uint64_t>::max(),
Expand Down
6 changes: 3 additions & 3 deletions tools/sst_dump_tool.cc
Original file line number Diff line number Diff line change
Expand Up @@ -398,9 +398,9 @@ int SSTDumpTool::Run(int argc, char const* const* argv, Options options) {
filename = std::string(dir_or_file) + "/" + filename;
}

ROCKSDB_NAMESPACE::SstFileDumper dumper(options, filename, readahead_size,
verify_checksum, output_hex,
decode_blob_index);
ROCKSDB_NAMESPACE::SstFileDumper dumper(
options, filename, Temperature::kUnknown, readahead_size,
verify_checksum, output_hex, decode_blob_index);
// Not a valid SST
if (!dumper.getStatus().ok()) {
fprintf(stderr, "%s: %s\n", filename.c_str(),
Expand Down
Loading

0 comments on commit cff0d1e

Please sign in to comment.