Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions db/c.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4404,6 +4404,16 @@ unsigned char rocksdb_options_get_skip_checking_sst_file_sizes_on_db_open(
return opt->rep.skip_checking_sst_file_sizes_on_db_open;
}

void rocksdb_options_set_skip_directory_scan_on_readonly_db_open(
rocksdb_options_t* opt, unsigned char val) {
opt->rep.skip_directory_scan_on_readonly_db_open = val;
}

unsigned char rocksdb_options_get_skip_directory_scan_on_readonly_db_open(
rocksdb_options_t* opt) {
return opt->rep.skip_directory_scan_on_readonly_db_open;
}

/* Blob Options Settings */
void rocksdb_options_set_enable_blob_files(rocksdb_options_t* opt,
unsigned char val) {
Expand Down
7 changes: 5 additions & 2 deletions db/db_impl/db_impl_open.cc
Original file line number Diff line number Diff line change
Expand Up @@ -723,7 +723,9 @@ Status DBImpl::Recover(
// attention to it in case we are recovering a database
// produced by an older version of rocksdb.
auto wal_dir = immutable_db_options_.GetWalDir();
if (!immutable_db_options_.best_efforts_recovery) {
if (!immutable_db_options_.best_efforts_recovery &&
!(read_only &&
immutable_db_options_.skip_directory_scan_on_readonly_db_open)) {
IOOptions io_opts;
io_opts.do_not_recurse = true;
s = immutable_db_options_.fs->GetChildren(
Expand Down Expand Up @@ -824,7 +826,8 @@ Status DBImpl::Recover(
}
}

if (read_only) {
if (read_only &&
!immutable_db_options_.skip_directory_scan_on_readonly_db_open) {
// If we are opening as read-only, we need to update options_file_number_
// to reflect the most recent OPTIONS file. It does not matter for regular
// read-write db instance because options_file_number_ will later be
Expand Down
79 changes: 79 additions & 0 deletions db/db_test2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "db/db_test_util.h"
#include "db/read_callback.h"
#include "db/version_edit.h"
#include "env/composite_env_wrapper.h"
#include "env/fs_readonly.h"
#include "options/options_helper.h"
#include "port/port.h"
Expand Down Expand Up @@ -94,6 +95,84 @@ TEST_F(DBTest2, OpenForReadOnlyWithColumnFamilies) {
ASSERT_NOK(env_->FileExists(dbname));
}

TEST_F(DBTest2, SkipDirectoryScanOnReadOnlyOpen) {
Options options = CurrentOptions();
options.create_if_missing = true;
DestroyAndReopen(options);
ASSERT_OK(Put("key1", "value1"));
ASSERT_OK(Put("key2", "value2"));
ASSERT_OK(Flush());
Close();

auto base_fs = env_->GetFileSystem();
auto no_readdir_fs = std::make_shared<test::NoReaddirFS>(base_fs);
std::unique_ptr<Env> custom_env(new CompositeEnvWrapper(env_, no_readdir_fs));

Options ro_options = CurrentOptions();
ro_options.env = custom_env.get();
DB* db_ptr = nullptr;
ASSERT_NOK(DB::OpenForReadOnly(ro_options, dbname_, &db_ptr));

// getdents64() should be skipped.
ro_options.skip_directory_scan_on_readonly_db_open = true;
ASSERT_OK(DB::OpenForReadOnly(ro_options, dbname_, &db_ptr));

std::string value;
ASSERT_OK(db_ptr->Get(ReadOptions(), "key1", &value));
ASSERT_EQ("value1", value);
ASSERT_OK(db_ptr->Get(ReadOptions(), "key2", &value));
ASSERT_EQ("value2", value);

delete db_ptr;
}

TEST_F(DBTest2, SkipDirectoryScanUnflushedDataNotVisible) {
Options options = CurrentOptions();

options.create_if_missing = true;
options.write_buffer_size = 64 << 20;
options.max_write_buffer_number = 10;
options.disable_auto_compactions = true;
DestroyAndReopen(options);

// Write a key, then flush. Write another key, don't flush to make
// sure its resident in the WAL.
ASSERT_OK(Put("flushed_key", "flushed_value"));
ASSERT_OK(Flush());

WriteOptions wo;
wo.disableWAL = false;
ASSERT_OK(db_->Put(wo, "unflushed_key", "unflushed_value"));

Close();

// Open read-only with skip_directory_scan_on_readonly_db_open = true
// The unflushed data should not be visible (WAL not replayed).
Options ro_options = CurrentOptions();
ro_options.skip_directory_scan_on_readonly_db_open = true;
DB* db_ptr = nullptr;
ASSERT_OK(DB::OpenForReadOnly(ro_options, dbname_, &db_ptr));

std::string value;
ASSERT_OK(db_ptr->Get(ReadOptions(), "flushed_key", &value));
ASSERT_EQ("flushed_value", value);
ASSERT_TRUE(db_ptr->Get(ReadOptions(), "unflushed_key", &value).IsNotFound());

delete db_ptr;

// Set skip_directory_scan_on_readonly_db_open to false, WAL should be found
// and replayed.
ro_options.skip_directory_scan_on_readonly_db_open = false;
ASSERT_OK(DB::OpenForReadOnly(ro_options, dbname_, &db_ptr));

ASSERT_OK(db_ptr->Get(ReadOptions(), "flushed_key", &value));
ASSERT_EQ("flushed_value", value);
ASSERT_OK(db_ptr->Get(ReadOptions(), "unflushed_key", &value));
ASSERT_EQ("unflushed_value", value);

delete db_ptr;
}

class PartitionedIndexTestListener : public EventListener {
public:
void OnFlushCompleted(DB* /*db*/, const FlushJobInfo& info) override {
Expand Down
6 changes: 6 additions & 0 deletions include/rocksdb/c.h
Original file line number Diff line number Diff line change
Expand Up @@ -1621,6 +1621,12 @@ rocksdb_options_set_skip_checking_sst_file_sizes_on_db_open(
extern ROCKSDB_LIBRARY_API unsigned char
rocksdb_options_get_skip_checking_sst_file_sizes_on_db_open(
rocksdb_options_t* opt);
extern ROCKSDB_LIBRARY_API void
rocksdb_options_set_skip_directory_scan_on_readonly_db_open(
rocksdb_options_t* opt, unsigned char val);
extern ROCKSDB_LIBRARY_API unsigned char
rocksdb_options_get_skip_directory_scan_on_readonly_db_open(
rocksdb_options_t* opt);

/* Blob Options Settings */
extern ROCKSDB_LIBRARY_API void rocksdb_options_set_enable_blob_files(
Expand Down
13 changes: 13 additions & 0 deletions include/rocksdb/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -1373,6 +1373,19 @@ struct DBOptions {
// Default: false
bool skip_checking_sst_file_sizes_on_db_open = false;

// If true, skip directory listing operations (readdir/getdents64) during
// read-only database open. Use this for file systems that do not support
// directory listing.
//
// When enabled, WAL directory scanning is skipped. This is safe for any
// database that was closed cleanly or flushed before being opened
// read-only.
//
// Only affects DB::OpenForReadOnly(); ignored for read-write opens.
//
// Default: false
bool skip_directory_scan_on_readonly_db_open = false;

// Recovery mode to control the consistency while replaying WAL
// Default: kPointInTimeRecovery
WALRecoveryMode wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery;
Expand Down
24 changes: 24 additions & 0 deletions java/rocksjni/options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1983,6 +1983,30 @@ jboolean Java_org_rocksdb_Options_skipCheckingSstFileSizesOnDbOpen(
return static_cast<jboolean>(opt->skip_checking_sst_file_sizes_on_db_open);
}

/*
* Class: org_rocksdb_Options
* Method: setSkipDirectoryScanOnReadOnlyDbOpen
* Signature: (JZ)V
*/
void Java_org_rocksdb_Options_setSkipDirectoryScanOnReadOnlyDbOpen(
JNIEnv*, jclass, jlong jhandle,
jboolean jskip_directory_scan_on_read_only_db_open) {
auto* opt = reinterpret_cast<ROCKSDB_NAMESPACE::Options*>(jhandle);
opt->skip_directory_scan_on_readonly_db_open =
static_cast<bool>(jskip_directory_scan_on_read_only_db_open);
}

/*
* Class: org_rocksdb_Options
* Method: skipDirectoryScanOnReadOnlyDbOpen
* Signature: (J)Z
*/
jboolean Java_org_rocksdb_Options_skipDirectoryScanOnReadOnlyDbOpen(
JNIEnv*, jclass, jlong jhandle) {
auto* opt = reinterpret_cast<ROCKSDB_NAMESPACE::Options*>(jhandle);
return static_cast<jboolean>(opt->skip_directory_scan_on_readonly_db_open);
}

/*
* Class: org_rocksdb_Options
* Method: setWalRecoveryMode
Expand Down
7 changes: 7 additions & 0 deletions options/db_options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,11 @@ static std::unordered_map<std::string, OptionTypeInfo>
skip_checking_sst_file_sizes_on_db_open),
OptionType::kBoolean, OptionVerificationType::kNormal,
OptionTypeFlags::kNone}},
{"skip_directory_scan_on_readonly_db_open",
{offsetof(struct ImmutableDBOptions,
skip_directory_scan_on_readonly_db_open),
OptionType::kBoolean, OptionVerificationType::kNormal,
OptionTypeFlags::kNone}},
{"new_table_reader_for_compaction_inputs",
{0, OptionType::kBoolean, OptionVerificationType::kDeprecated,
OptionTypeFlags::kNone}},
Expand Down Expand Up @@ -769,6 +774,8 @@ ImmutableDBOptions::ImmutableDBOptions(const DBOptions& options)
skip_stats_update_on_db_open(options.skip_stats_update_on_db_open),
skip_checking_sst_file_sizes_on_db_open(
options.skip_checking_sst_file_sizes_on_db_open),
skip_directory_scan_on_readonly_db_open(
options.skip_directory_scan_on_readonly_db_open),
wal_recovery_mode(options.wal_recovery_mode),
allow_2pc(options.allow_2pc),
row_cache(options.row_cache),
Expand Down
1 change: 1 addition & 0 deletions options/db_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ struct ImmutableDBOptions {
uint64_t write_thread_slow_yield_usec;
bool skip_stats_update_on_db_open;
bool skip_checking_sst_file_sizes_on_db_open;
bool skip_directory_scan_on_readonly_db_open;
WALRecoveryMode wal_recovery_mode;
bool allow_2pc;
std::shared_ptr<Cache> row_cache;
Expand Down
2 changes: 2 additions & 0 deletions options/options_helper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ void BuildDBOptions(const ImmutableDBOptions& immutable_db_options,
immutable_db_options.skip_stats_update_on_db_open;
options.skip_checking_sst_file_sizes_on_db_open =
immutable_db_options.skip_checking_sst_file_sizes_on_db_open;
options.skip_directory_scan_on_readonly_db_open =
immutable_db_options.skip_directory_scan_on_readonly_db_open;
options.wal_recovery_mode = immutable_db_options.wal_recovery_mode;
options.allow_2pc = immutable_db_options.allow_2pc;
options.row_cache = immutable_db_options.row_cache;
Expand Down
17 changes: 17 additions & 0 deletions test_util/testutil.h
Original file line number Diff line number Diff line change
Expand Up @@ -744,6 +744,23 @@ class StringFS : public FileSystemWrapper {
std::unordered_map<std::string, std::string> files_;
};

// Filesystem wrapper that rejects directory listing operations (GetChildren).
// Used for testing skip_directory_scan_on_readonly_db_open option.
class NoReaddirFS : public FileSystemWrapper {
public:
explicit NoReaddirFS(const std::shared_ptr<FileSystem>& base)
: FileSystemWrapper(base) {}

static const char* kClassName() { return "NoReaddirFS"; }
const char* Name() const override { return kClassName(); }

IOStatus GetChildren(const std::string& /*dir*/, const IOOptions& /*opts*/,
std::vector<std::string>* /*result*/,
IODebugContext* /*dbg*/) override {
return IOStatus::NotSupported("Directory listing not supported");
}
};

// A compressor that essentially implements a custom compression algorithm
// by leveraging an existing compression algorithm and putting a custom header
// on it to detect any attempts to decompress it with the wrong compression
Expand Down