Skip to content

Commit

Permalink
Use user key on sst file for blob verification for Get and MultiGet (…
Browse files Browse the repository at this point in the history
…#11105)

Summary:
Use the user key on sst file for blob verification for `Get` and `MultiGet` instead of the user key passed from caller.

Add tests for `Get` and `MultiGet` operations when user defined timestamp feature is enabled in a BlobDB.

Pull Request resolved: facebook/rocksdb#11105

Test Plan:
make V=1 db_blob_basic_test
./db_blob_basic_test --gtest_filter="DBBlobTestWithTimestamp.*"

Reviewed By: ltamasi

Differential Revision: D42716487

Pulled By: jowlyzhang

fbshipit-source-id: 5987ecbb7e56ddf46d2467a3649369390789506a
  • Loading branch information
jowlyzhang authored and facebook-github-bot committed Jan 30, 2023
1 parent 79e57a3 commit 24ac53d
Show file tree
Hide file tree
Showing 5 changed files with 215 additions and 10 deletions.
1 change: 1 addition & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

### Bug Fixes
* Fixed a data race on `ColumnFamilyData::flush_reason` caused by concurrent flushes.
* Fixed an issue in `Get` and `MultiGet` when user-defined timestamps is enabled in combination with BlobDB.

### Feature Removal
* Remove RocksDB Lite.
Expand Down
185 changes: 185 additions & 0 deletions db/blob/db_blob_basic_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "db/blob/blob_index.h"
#include "db/blob/blob_log_format.h"
#include "db/db_test_util.h"
#include "db/db_with_timestamp_test_util.h"
#include "port/stack_trace.h"
#include "test_util/sync_point.h"
#include "utilities/fault_injection_env.h"
Expand Down Expand Up @@ -1771,6 +1772,190 @@ TEST_F(DBBlobBasicTest, WarmCacheWithBlobsSecondary) {
1);
}

class DBBlobWithTimestampTest : public DBBasicTestWithTimestampBase {
protected:
DBBlobWithTimestampTest()
: DBBasicTestWithTimestampBase("db_blob_with_timestamp_test") {}
};

TEST_F(DBBlobWithTimestampTest, GetBlob) {
Options options = GetDefaultOptions();
options.create_if_missing = true;
options.enable_blob_files = true;
options.min_blob_size = 0;
const size_t kTimestampSize = Timestamp(0, 0).size();
TestComparator test_cmp(kTimestampSize);
options.comparator = &test_cmp;

DestroyAndReopen(options);
WriteOptions write_opts;
const std::string ts = Timestamp(1, 0);
constexpr char key[] = "key";
constexpr char blob_value[] = "blob_value";

ASSERT_OK(db_->Put(write_opts, key, ts, blob_value));

ASSERT_OK(Flush());

const std::string read_ts = Timestamp(2, 0);
Slice read_ts_slice(read_ts);
ReadOptions read_opts;
read_opts.timestamp = &read_ts_slice;
std::string value;
ASSERT_OK(db_->Get(read_opts, key, &value));
ASSERT_EQ(value, blob_value);
}

TEST_F(DBBlobWithTimestampTest, MultiGetBlobs) {
constexpr size_t min_blob_size = 6;

Options options = GetDefaultOptions();
options.enable_blob_files = true;
options.min_blob_size = min_blob_size;
options.create_if_missing = true;
const size_t kTimestampSize = Timestamp(0, 0).size();
TestComparator test_cmp(kTimestampSize);
options.comparator = &test_cmp;

DestroyAndReopen(options);

// Put then retrieve three key-values. The first value is below the size limit
// and is thus stored inline; the other two are stored separately as blobs.
constexpr size_t num_keys = 3;

constexpr char first_key[] = "first_key";
constexpr char first_value[] = "short";
static_assert(sizeof(first_value) - 1 < min_blob_size,
"first_value too long to be inlined");

DestroyAndReopen(options);
WriteOptions write_opts;
const std::string ts = Timestamp(1, 0);
ASSERT_OK(db_->Put(write_opts, first_key, ts, first_value));

constexpr char second_key[] = "second_key";
constexpr char second_value[] = "long_value";
static_assert(sizeof(second_value) - 1 >= min_blob_size,
"second_value too short to be stored as blob");

ASSERT_OK(db_->Put(write_opts, second_key, ts, second_value));

constexpr char third_key[] = "third_key";
constexpr char third_value[] = "other_long_value";
static_assert(sizeof(third_value) - 1 >= min_blob_size,
"third_value too short to be stored as blob");

ASSERT_OK(db_->Put(write_opts, third_key, ts, third_value));

ASSERT_OK(Flush());

ReadOptions read_options;
const std::string read_ts = Timestamp(2, 0);
Slice read_ts_slice(read_ts);
read_options.timestamp = &read_ts_slice;
std::array<Slice, num_keys> keys{{first_key, second_key, third_key}};

{
std::array<PinnableSlice, num_keys> values;
std::array<Status, num_keys> statuses;

db_->MultiGet(read_options, db_->DefaultColumnFamily(), num_keys, &keys[0],
&values[0], &statuses[0]);

ASSERT_OK(statuses[0]);
ASSERT_EQ(values[0], first_value);

ASSERT_OK(statuses[1]);
ASSERT_EQ(values[1], second_value);

ASSERT_OK(statuses[2]);
ASSERT_EQ(values[2], third_value);
}
}

TEST_F(DBBlobWithTimestampTest, GetMergeBlobWithPut) {
Options options = GetDefaultOptions();
options.merge_operator = MergeOperators::CreateStringAppendOperator();
options.enable_blob_files = true;
options.min_blob_size = 0;
options.create_if_missing = true;
const size_t kTimestampSize = Timestamp(0, 0).size();
TestComparator test_cmp(kTimestampSize);
options.comparator = &test_cmp;

DestroyAndReopen(options);

WriteOptions write_opts;
const std::string ts = Timestamp(1, 0);
ASSERT_OK(db_->Put(write_opts, "Key1", ts, "v1"));
ASSERT_OK(Flush());
ASSERT_OK(
db_->Merge(write_opts, db_->DefaultColumnFamily(), "Key1", ts, "v2"));
ASSERT_OK(Flush());
ASSERT_OK(
db_->Merge(write_opts, db_->DefaultColumnFamily(), "Key1", ts, "v3"));
ASSERT_OK(Flush());

std::string value;
const std::string read_ts = Timestamp(2, 0);
Slice read_ts_slice(read_ts);
ReadOptions read_opts;
read_opts.timestamp = &read_ts_slice;
ASSERT_OK(db_->Get(read_opts, "Key1", &value));
ASSERT_EQ(value, "v1,v2,v3");
}

TEST_F(DBBlobWithTimestampTest, MultiGetMergeBlobWithPut) {
constexpr size_t num_keys = 3;

Options options = GetDefaultOptions();
options.merge_operator = MergeOperators::CreateStringAppendOperator();
options.enable_blob_files = true;
options.min_blob_size = 0;
options.create_if_missing = true;
const size_t kTimestampSize = Timestamp(0, 0).size();
TestComparator test_cmp(kTimestampSize);
options.comparator = &test_cmp;

DestroyAndReopen(options);

WriteOptions write_opts;
const std::string ts = Timestamp(1, 0);

ASSERT_OK(db_->Put(write_opts, "Key0", ts, "v0_0"));
ASSERT_OK(db_->Put(write_opts, "Key1", ts, "v1_0"));
ASSERT_OK(db_->Put(write_opts, "Key2", ts, "v2_0"));
ASSERT_OK(Flush());
ASSERT_OK(
db_->Merge(write_opts, db_->DefaultColumnFamily(), "Key0", ts, "v0_1"));
ASSERT_OK(
db_->Merge(write_opts, db_->DefaultColumnFamily(), "Key1", ts, "v1_1"));
ASSERT_OK(Flush());
ASSERT_OK(
db_->Merge(write_opts, db_->DefaultColumnFamily(), "Key0", ts, "v0_2"));
ASSERT_OK(Flush());

const std::string read_ts = Timestamp(2, 0);
Slice read_ts_slice(read_ts);
ReadOptions read_opts;
read_opts.timestamp = &read_ts_slice;
std::array<Slice, num_keys> keys{{"Key0", "Key1", "Key2"}};
std::array<PinnableSlice, num_keys> values;
std::array<Status, num_keys> statuses;

db_->MultiGet(read_opts, db_->DefaultColumnFamily(), num_keys, &keys[0],
&values[0], &statuses[0]);

ASSERT_OK(statuses[0]);
ASSERT_EQ(values[0], "v0_0,v0_1,v0_2");

ASSERT_OK(statuses[1]);
ASSERT_EQ(values[1], "v1_0,v1_1");

ASSERT_OK(statuses[2]);
ASSERT_EQ(values[2], "v2_0");
}

} // namespace ROCKSDB_NAMESPACE

int main(int argc, char** argv) {
Expand Down
13 changes: 8 additions & 5 deletions db/version_set.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2194,8 +2194,9 @@ void Version::MultiGetBlob(

key_context.value->Reset();
blob_reqs_in_file.emplace_back(
key_context.ukey_with_ts, blob_index.offset(), blob_index.size(),
blob_index.compression(), key_context.value, key_context.s);
key_context.get_context->ukey_to_get_blob_value(),
blob_index.offset(), blob_index.size(), blob_index.compression(),
key_context.value, key_context.s);
}
if (blob_reqs_in_file.size() > 0) {
const auto file_size = blob_file_meta->GetBlobFileSize();
Expand All @@ -2204,7 +2205,8 @@ void Version::MultiGetBlob(
}

if (blob_reqs.size() > 0) {
blob_source_->MultiGetBlob(read_options, blob_reqs, /*bytes_read=*/nullptr);
blob_source_->MultiGetBlob(read_options, blob_reqs,
/*bytes_read=*/nullptr);
}

for (auto& ctx : blob_ctxs) {
Expand Down Expand Up @@ -2345,8 +2347,9 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
constexpr FilePrefetchBuffer* prefetch_buffer = nullptr;
constexpr uint64_t* bytes_read = nullptr;

*status = GetBlob(read_options, user_key, *value, prefetch_buffer,
value, bytes_read);
*status =
GetBlob(read_options, get_context.ukey_to_get_blob_value(),
*value, prefetch_buffer, value, bytes_read);
if (!status->ok()) {
if (status->IsIncomplete()) {
get_context.MarkKeyMayExist();
Expand Down
11 changes: 7 additions & 4 deletions table/get_context.cc
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,9 @@ bool GetContext::SaveValue(const ParsedInternalKey& parsed_key,
if (kNotFound == state_) {
state_ = kFound;
if (do_merge_) {
if (type == kTypeBlobIndex && ucmp_->timestamp_size() != 0) {
ukey_with_ts_found_.PinSelf(parsed_key.user_key);
}
if (LIKELY(pinnable_val_ != nullptr)) {
Slice value_to_use = value;

Expand Down Expand Up @@ -339,7 +342,7 @@ bool GetContext::SaveValue(const ParsedInternalKey& parsed_key,
// merge_context_->operand_list
if (type == kTypeBlobIndex) {
PinnableSlice pin_val;
if (GetBlobValue(value, &pin_val) == false) {
if (GetBlobValue(parsed_key.user_key, value, &pin_val) == false) {
return false;
}
Slice blob_value(pin_val);
Expand All @@ -365,7 +368,7 @@ bool GetContext::SaveValue(const ParsedInternalKey& parsed_key,
assert(merge_operator_ != nullptr);
if (type == kTypeBlobIndex) {
PinnableSlice pin_val;
if (GetBlobValue(value, &pin_val) == false) {
if (GetBlobValue(parsed_key.user_key, value, &pin_val) == false) {
return false;
}
Slice blob_value(pin_val);
Expand Down Expand Up @@ -545,13 +548,13 @@ void GetContext::MergeWithEntity(Slice entity) {
}
}

bool GetContext::GetBlobValue(const Slice& blob_index,
bool GetContext::GetBlobValue(const Slice& user_key, const Slice& blob_index,
PinnableSlice* blob_value) {
constexpr FilePrefetchBuffer* prefetch_buffer = nullptr;
constexpr uint64_t* bytes_read = nullptr;

Status status = blob_fetcher_->FetchBlob(
user_key_, blob_index, prefetch_buffer, blob_value, bytes_read);
user_key, blob_index, prefetch_buffer, blob_value, bytes_read);
if (!status.ok()) {
if (status.IsIncomplete()) {
// FIXME: this code is not covered by unit tests
Expand Down
15 changes: 14 additions & 1 deletion table/get_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,14 +177,23 @@ class GetContext {

bool has_callback() const { return callback_ != nullptr; }

const Slice& ukey_to_get_blob_value() const {
if (!ukey_with_ts_found_.empty()) {
return ukey_with_ts_found_;
} else {
return user_key_;
}
}

uint64_t get_tracing_get_id() const { return tracing_get_id_; }

void push_operand(const Slice& value, Cleanable* value_pinner);

private:
void Merge(const Slice* value);
void MergeWithEntity(Slice entity);
bool GetBlobValue(const Slice& blob_index, PinnableSlice* blob_value);
bool GetBlobValue(const Slice& user_key, const Slice& blob_index,
PinnableSlice* blob_value);

const Comparator* ucmp_;
const MergeOperator* merge_operator_;
Expand All @@ -194,6 +203,10 @@ class GetContext {

GetState state_;
Slice user_key_;
// When a blob index is found with the user key containing timestamp,
// this copies the corresponding user key on record in the sst file
// and is later used for blob verification.
PinnableSlice ukey_with_ts_found_;
PinnableSlice* pinnable_val_;
PinnableWideColumns* columns_;
std::string* timestamp_;
Expand Down

0 comments on commit 24ac53d

Please sign in to comment.