From 0b90ee74e06cc5a3a30088fa39fde5634148bda1 Mon Sep 17 00:00:00 2001 From: Matan Breizman Date: Mon, 27 May 2024 13:21:41 +0000 Subject: [PATCH 01/10] crimson/osd/object_context_loader: Fix obc cache existence usage with_head_obc() uses get_cached_obc() to get_or_create obc instances. If the obc exists in cache, get_or_load_obc is called with `existed`=true. The assumption above is wrong. Cache existence (`existed`) only guarantees that the obc instance was created (and inserted) in the obc_registery. However, it does **not** assure that the obc was actually loaded. As obc-loading is now concurrent, it's possible for the first user to only create the obc in cache (without loading yet) and the second concurrent user to assume it was already loaded. With this patch, we verify that the obc was loaded in get_or_load_obc. * make loading-obc concurrent PR: https://github.com/ceph/ceph/pull/55488 Fixes: https://tracker.ceph.com/issues/64206 Fixes: https://tracker.ceph.com/issues/66214 Signed-off-by: Matan Breizman --- src/crimson/osd/object_context.h | 10 +++++++++- src/crimson/osd/object_context_loader.cc | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/crimson/osd/object_context.h b/src/crimson/osd/object_context.h index e1a3cc9298784..b223693385198 100644 --- a/src/crimson/osd/object_context.h +++ b/src/crimson/osd/object_context.h @@ -117,8 +117,16 @@ class ObjectContext : public ceph::common::intrusive_lru_base< } } + bool is_loaded() const { + return fully_loaded; + } + + bool is_valid() const { + return !invalidated_by_interval_change; + } + bool is_loaded_and_valid() const { - return fully_loaded && !invalidated_by_interval_change; + return is_loaded() && is_valid(); } private: diff --git a/src/crimson/osd/object_context_loader.cc b/src/crimson/osd/object_context_loader.cc index b53cbabd04c91..acf59637e2af9 100644 --- a/src/crimson/osd/object_context_loader.cc +++ b/src/crimson/osd/object_context_loader.cc @@ -146,7 +146,7 @@ using crimson::common::local_conf; LOG_PREFIX(ObjectContextLoader::get_or_load_obc); auto loaded = load_obc_iertr::make_ready_future(obc); - if (existed) { + if (existed && obc->is_loaded()) { if (!obc->is_loaded_and_valid()) { ERRORDPP( "obc for {} invalid -- fully_loaded={}, " From ccc0001a5dc54aef5b4d4b3f85952f1edaf17fb1 Mon Sep 17 00:00:00 2001 From: Matan Breizman Date: Mon, 27 May 2024 15:24:00 +0000 Subject: [PATCH 02/10] crimson/osd/object_context: cleanup is_loaded_and_valid Signed-off-by: Matan Breizman --- src/crimson/osd/object_context.h | 4 ---- src/crimson/osd/object_context_loader.cc | 15 ++++++--------- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/src/crimson/osd/object_context.h b/src/crimson/osd/object_context.h index b223693385198..d651dc912fd52 100644 --- a/src/crimson/osd/object_context.h +++ b/src/crimson/osd/object_context.h @@ -125,10 +125,6 @@ class ObjectContext : public ceph::common::intrusive_lru_base< return !invalidated_by_interval_change; } - bool is_loaded_and_valid() const { - return is_loaded() && is_valid(); - } - private: tri_mutex lock; bool recovery_read_marker = false; diff --git a/src/crimson/osd/object_context_loader.cc b/src/crimson/osd/object_context_loader.cc index acf59637e2af9..d8035b440ea1f 100644 --- a/src/crimson/osd/object_context_loader.cc +++ b/src/crimson/osd/object_context_loader.cc @@ -144,18 +144,15 @@ using crimson::common::local_conf; bool existed) { LOG_PREFIX(ObjectContextLoader::get_or_load_obc); + DEBUGDPP("{} -- fully_loaded={}, " + "invalidated_by_interval_change={}", + dpp, obc->get_oid(), + obc->fully_loaded, + obc->invalidated_by_interval_change); auto loaded = load_obc_iertr::make_ready_future(obc); if (existed && obc->is_loaded()) { - if (!obc->is_loaded_and_valid()) { - ERRORDPP( - "obc for {} invalid -- fully_loaded={}, " - "invalidated_by_interval_change={}", - dpp, obc->get_oid(), - obc->fully_loaded, obc->invalidated_by_interval_change - ); - } - ceph_assert(obc->is_loaded_and_valid()); + ceph_assert(obc->is_valid()); DEBUGDPP("cache hit on {}", dpp, obc->get_oid()); } else { DEBUGDPP("cache miss on {}", dpp, obc->get_oid()); From e456bd7cdd1ad1cd68026cebedd5b1d9d01001f7 Mon Sep 17 00:00:00 2001 From: Matan Breizman Date: Tue, 28 May 2024 13:23:10 +0000 Subject: [PATCH 03/10] crimson/osd/object_context: await in-progress loading (per-obc) ``` // obc loading is a concurrent phase. In case this obc is being loaded, // make other useres of this obc to await for the loading to complete. ``` Since we now await for loading to finish (in-case in progress), we can also assert is_loaded(). Fixes: https://tracker.ceph.com/issues/65451 Co-authored-by: Xuehan Xu Signed-off-by: Matan Breizman --- src/crimson/common/interruptible_future.h | 11 ++++++++ src/crimson/osd/object_context.h | 4 +++ src/crimson/osd/object_context_loader.cc | 31 +++++++++++++---------- src/crimson/osd/object_context_loader.h | 3 +++ 4 files changed, 35 insertions(+), 14 deletions(-) diff --git a/src/crimson/common/interruptible_future.h b/src/crimson/common/interruptible_future.h index 405d9c3c05d26..59bb0be30873b 100644 --- a/src/crimson/common/interruptible_future.h +++ b/src/crimson/common/interruptible_future.h @@ -1230,6 +1230,17 @@ struct interruptor }; } + template + [[gnu::always_inline]] + static auto with_lock(Lock& lock, Func&& func) { + return seastar::with_lock( + lock, + [func=std::move(func), + interrupt_condition=interrupt_cond.interrupt_cond]() mutable { + return call_with_interruption(interrupt_condition, func); + }); + } + template AsyncAction> [[gnu::always_inline]] diff --git a/src/crimson/osd/object_context.h b/src/crimson/osd/object_context.h index d651dc912fd52..9aa5d60a80039 100644 --- a/src/crimson/osd/object_context.h +++ b/src/crimson/osd/object_context.h @@ -70,6 +70,10 @@ class ObjectContext : public ceph::common::intrusive_lru_base< using watch_key_t = std::pair; std::map> watchers; + // obc loading is a concurrent phase. In case this obc is being loaded, + // make other users of this obc to await for the loading to complete. + seastar::shared_mutex loading_mutex; + ObjectContext(hobject_t hoid) : obs(std::move(hoid)) {} const hobject_t &get_oid() const { diff --git a/src/crimson/osd/object_context_loader.cc b/src/crimson/osd/object_context_loader.cc index d8035b440ea1f..9af927806d568 100644 --- a/src/crimson/osd/object_context_loader.cc +++ b/src/crimson/osd/object_context_loader.cc @@ -149,20 +149,23 @@ using crimson::common::local_conf; dpp, obc->get_oid(), obc->fully_loaded, obc->invalidated_by_interval_change); - auto loaded = - load_obc_iertr::make_ready_future(obc); - if (existed && obc->is_loaded()) { - ceph_assert(obc->is_valid()); - DEBUGDPP("cache hit on {}", dpp, obc->get_oid()); - } else { - DEBUGDPP("cache miss on {}", dpp, obc->get_oid()); - loaded = - obc->template with_promoted_lock( - [obc, this] { - return load_obc(obc); - }); - } - return loaded; + return interruptor::with_lock(obc->loading_mutex, + [this, obc, existed, FNAME] { + auto loaded = + load_obc_iertr::make_ready_future(obc); + if (existed) { + ceph_assert(obc->is_valid() && obc->is_loaded()); + DEBUGDPP("cache hit on {}", dpp, obc->get_oid()); + } else { + DEBUGDPP("cache miss on {}", dpp, obc->get_oid()); + loaded = + obc->template with_promoted_lock( + [obc, this] { + return load_obc(obc); + }); + } + return loaded; + }); } ObjectContextLoader::load_obc_iertr::future<> diff --git a/src/crimson/osd/object_context_loader.h b/src/crimson/osd/object_context_loader.h index 77805e11bc167..b51e10caf77e8 100644 --- a/src/crimson/osd/object_context_loader.h +++ b/src/crimson/osd/object_context_loader.h @@ -29,6 +29,9 @@ class ObjectContextLoader { ::crimson::osd::IOInterruptCondition, load_obc_ertr>; + using interruptor = ::crimson::interruptible::interruptor< + ::crimson::osd::IOInterruptCondition>; + using with_obc_func_t = std::function (ObjectContextRef, ObjectContextRef)>; From 5b5d2ea4aebcbbe3fec349377337fe38f5a2f27a Mon Sep 17 00:00:00 2001 From: Matan Breizman Date: Tue, 28 May 2024 13:52:08 +0000 Subject: [PATCH 04/10] crimson/osd/object_context_loader: cleanup `loaded` Signed-off-by: Matan Breizman --- src/crimson/osd/object_context_loader.cc | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/crimson/osd/object_context_loader.cc b/src/crimson/osd/object_context_loader.cc index 9af927806d568..79185a9460e9b 100644 --- a/src/crimson/osd/object_context_loader.cc +++ b/src/crimson/osd/object_context_loader.cc @@ -88,8 +88,8 @@ using crimson::common::local_conf; [existed=existed, clone=std::move(clone), func=std::move(func), head=std::move(head), this]() mutable -> load_obc_iertr::future<> { - auto loaded = get_or_load_obc(clone, existed); - return loaded.safe_then_interruptible( + return get_or_load_obc(clone, existed + ).safe_then_interruptible( [func = std::move(func), head=std::move(head)](auto clone) mutable { clone->set_clone_ssc(head->ssc); return std::move(func)(std::move(head), std::move(clone)); @@ -151,20 +151,17 @@ using crimson::common::local_conf; obc->invalidated_by_interval_change); return interruptor::with_lock(obc->loading_mutex, [this, obc, existed, FNAME] { - auto loaded = - load_obc_iertr::make_ready_future(obc); if (existed) { ceph_assert(obc->is_valid() && obc->is_loaded()); DEBUGDPP("cache hit on {}", dpp, obc->get_oid()); + return load_obc_iertr::make_ready_future(obc); } else { DEBUGDPP("cache miss on {}", dpp, obc->get_oid()); - loaded = - obc->template with_promoted_lock( + return obc->template with_promoted_lock( [obc, this] { return load_obc(obc); }); } - return loaded; }); } From 583cb17a308c9c56851719371aac75d08e3f35c8 Mon Sep 17 00:00:00 2001 From: Matan Breizman Date: Thu, 30 May 2024 11:48:46 +0000 Subject: [PATCH 05/10] crimson/osd/object_context_loader: with_head_obc to log `existed` Signed-off-by: Matan Breizman --- src/crimson/osd/object_context_loader.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/crimson/osd/object_context_loader.cc b/src/crimson/osd/object_context_loader.cc index 79185a9460e9b..de147f2ee0e05 100644 --- a/src/crimson/osd/object_context_loader.cc +++ b/src/crimson/osd/object_context_loader.cc @@ -14,7 +14,8 @@ using crimson::common::local_conf; { LOG_PREFIX(ObjectContextLoader::with_head_obc); auto [obc, existed] = obc_registry.get_cached_obc(oid); - DEBUGDPP("object {}", dpp, obc->get_oid()); + DEBUGDPP("object {} existed {}", + dpp, obc->get_oid(), existed); assert(obc->is_head()); obc->append_to(obc_set_accessing); return obc->with_lock( From 6c5106d134c7a4c308253b8ba04aa66416bf1689 Mon Sep 17 00:00:00 2001 From: Matan Breizman Date: Sun, 2 Jun 2024 15:00:37 +0000 Subject: [PATCH 06/10] crimson/common/tri_mutex: add debug logs to be used only for testing Signed-off-by: Matan Breizman --- src/crimson/common/tri_mutex.cc | 43 ++++++++++++++++++++++++++++++++ src/crimson/common/tri_mutex.h | 25 +++++++++++++++++++ src/crimson/osd/object_context.h | 10 +++++--- 3 files changed, 74 insertions(+), 4 deletions(-) diff --git a/src/crimson/common/tri_mutex.cc b/src/crimson/common/tri_mutex.cc index f79d256688543..94b0642ded010 100644 --- a/src/crimson/common/tri_mutex.cc +++ b/src/crimson/common/tri_mutex.cc @@ -5,6 +5,9 @@ #include +SET_SUBSYS(osd); +//TODO: SET_SUBSYS(crimson_tri_mutex); + seastar::future<> read_lock::lock() { return static_cast(this)->lock_for_read(); @@ -57,20 +60,28 @@ void excl_lock_from_write::unlock() tri_mutex::~tri_mutex() { + LOG_PREFIX(tri_mutex::~tri_mutex()); + DEBUGDPP("", *this); assert(!is_acquired()); } seastar::future<> tri_mutex::lock_for_read() { + LOG_PREFIX(tri_mutex::lock_for_read()); + DEBUGDPP("", *this); if (try_lock_for_read()) { + DEBUGDPP("lock_for_read successfully", *this); return seastar::now(); } + DEBUGDPP("can't lock_for_read, adding to waiters", *this); waiters.emplace_back(seastar::promise<>(), type_t::read); return waiters.back().pr.get_future(); } bool tri_mutex::try_lock_for_read() noexcept { + LOG_PREFIX(tri_mutex::try_lock_for_read()); + DEBUGDPP("", *this); if (!writers && !exclusively_used && waiters.empty()) { ++readers; return true; @@ -81,6 +92,8 @@ bool tri_mutex::try_lock_for_read() noexcept void tri_mutex::unlock_for_read() { + LOG_PREFIX(tri_mutex::unlock_for_read()); + DEBUGDPP("", *this); assert(readers > 0); if (--readers == 0) { wake(); @@ -89,6 +102,8 @@ void tri_mutex::unlock_for_read() void tri_mutex::promote_from_read() { + LOG_PREFIX(tri_mutex::promote_from_read()); + DEBUGDPP("", *this); assert(readers == 1); --readers; exclusively_used = true; @@ -96,6 +111,8 @@ void tri_mutex::promote_from_read() void tri_mutex::demote_to_read() { + LOG_PREFIX(tri_mutex::demote_to_read()); + DEBUGDPP("", *this); assert(exclusively_used); exclusively_used = false; ++readers; @@ -103,15 +120,21 @@ void tri_mutex::demote_to_read() seastar::future<> tri_mutex::lock_for_write() { + LOG_PREFIX(tri_mutex::lock_for_write()); + DEBUGDPP("", *this); if (try_lock_for_write()) { + DEBUGDPP("lock_for_write successfully", *this); return seastar::now(); } + DEBUGDPP("can't lock_for_write, adding to waiters", *this); waiters.emplace_back(seastar::promise<>(), type_t::write); return waiters.back().pr.get_future(); } bool tri_mutex::try_lock_for_write() noexcept { + LOG_PREFIX(tri_mutex::try_lock_for_write()); + DEBUGDPP("", *this); if (!readers && !exclusively_used) { if (waiters.empty()) { ++writers; @@ -123,6 +146,8 @@ bool tri_mutex::try_lock_for_write() noexcept void tri_mutex::unlock_for_write() { + LOG_PREFIX(tri_mutex::unlock_for_write()); + DEBUGDPP("", *this); assert(writers > 0); if (--writers == 0) { wake(); @@ -131,6 +156,8 @@ void tri_mutex::unlock_for_write() void tri_mutex::promote_from_write() { + LOG_PREFIX(tri_mutex::promote_from_write()); + DEBUGDPP("", *this); assert(writers == 1); --writers; exclusively_used = true; @@ -138,6 +165,8 @@ void tri_mutex::promote_from_write() void tri_mutex::demote_to_write() { + LOG_PREFIX(tri_mutex::demote_to_write()); + DEBUGDPP("", *this); assert(exclusively_used); exclusively_used = false; ++writers; @@ -146,15 +175,21 @@ void tri_mutex::demote_to_write() // for exclusive users seastar::future<> tri_mutex::lock_for_excl() { + LOG_PREFIX(tri_mutex::lock_for_excl()); + DEBUGDPP("", *this); if (try_lock_for_excl()) { + DEBUGDPP("lock_for_excl, successfully", *this); return seastar::now(); } + DEBUGDPP("can't lock_for_excl, adding to waiters", *this); waiters.emplace_back(seastar::promise<>(), type_t::exclusive); return waiters.back().pr.get_future(); } bool tri_mutex::try_lock_for_excl() noexcept { + LOG_PREFIX(tri_mutex::try_lock_for_excl()); + DEBUGDPP("", *this); if (readers == 0u && writers == 0u && !exclusively_used) { exclusively_used = true; return true; @@ -165,6 +200,8 @@ bool tri_mutex::try_lock_for_excl() noexcept void tri_mutex::unlock_for_excl() { + LOG_PREFIX(tri_mutex::unlock_for_excl()); + DEBUGDPP("", *this); assert(exclusively_used); exclusively_used = false; wake(); @@ -172,6 +209,8 @@ void tri_mutex::unlock_for_excl() bool tri_mutex::is_acquired() const { + LOG_PREFIX(tri_mutex::is_acquired()); + DEBUGDPP("", *this); if (readers != 0u) { return true; } else if (writers != 0u) { @@ -185,6 +224,8 @@ bool tri_mutex::is_acquired() const void tri_mutex::wake() { + LOG_PREFIX(tri_mutex::wake()); + DEBUGDPP("", *this); assert(!readers && !writers && !exclusively_used); type_t type = type_t::none; while (!waiters.empty()) { @@ -210,7 +251,9 @@ void tri_mutex::wake() default: assert(0); } + // TODO: DEBUGDPP("waking up {} ", *this); waiter.pr.set_value(); waiters.pop_front(); } + DEBUGDPP("no waiters", *this); } diff --git a/src/crimson/common/tri_mutex.h b/src/crimson/common/tri_mutex.h index d1c215be27e20..fc825e09e3814 100644 --- a/src/crimson/common/tri_mutex.h +++ b/src/crimson/common/tri_mutex.h @@ -5,6 +5,7 @@ #include #include +#include "crimson/common/log.h" class read_lock { public: @@ -62,6 +63,11 @@ class tri_mutex : private read_lock, { public: tri_mutex() = default; +#ifdef NDEBUG + tri_mutex(const std::string obj_name) : name() {} +#else + tri_mutex(const std::string obj_name) : name(obj_name) {} +#endif ~tri_mutex(); read_lock& for_read() { @@ -120,6 +126,10 @@ class tri_mutex : private read_lock, } } + std::string_view get_name() const{ + return name; + } + private: void wake(); unsigned readers = 0; @@ -139,10 +149,25 @@ class tri_mutex : private read_lock, type_t type; }; seastar::circular_buffer waiters; + const std::string name; friend class read_lock; friend class write_lock; friend class excl_lock; friend class excl_lock_from_read; friend class excl_lock_from_write; friend class excl_lock_from_excl; + friend std::ostream& operator<<(std::ostream &lhs, const tri_mutex &rhs); }; + +inline std::ostream& operator<<(std::ostream& os, const tri_mutex& tm) +{ + os << fmt::format("tri_mutex {} writers {} readers {}" + " exclusively_used {} waiters: {}", + tm.get_name(), tm.get_writers(), tm.get_readers(), + tm.exclusively_used, tm.waiters.size()); + return os; +} + +#if FMT_VERSION >= 90000 +template <> struct fmt::formatter : fmt::ostream_formatter {}; +#endif diff --git a/src/crimson/osd/object_context.h b/src/crimson/osd/object_context.h index 9aa5d60a80039..2de28558b5660 100644 --- a/src/crimson/osd/object_context.h +++ b/src/crimson/osd/object_context.h @@ -61,6 +61,10 @@ class ObjectContext : public ceph::common::intrusive_lru_base< ceph::common::intrusive_lru_config< hobject_t, ObjectContext, obc_to_hoid>> { +private: + tri_mutex lock; + bool recovery_read_marker = false; + public: ObjectState obs; SnapSetContextRef ssc; @@ -74,7 +78,8 @@ class ObjectContext : public ceph::common::intrusive_lru_base< // make other users of this obc to await for the loading to complete. seastar::shared_mutex loading_mutex; - ObjectContext(hobject_t hoid) : obs(std::move(hoid)) {} + ObjectContext(hobject_t hoid) : lock(hoid.oid.name), + obs(std::move(hoid)) {} const hobject_t &get_oid() const { return obs.oi.soid; @@ -130,9 +135,6 @@ class ObjectContext : public ceph::common::intrusive_lru_base< } private: - tri_mutex lock; - bool recovery_read_marker = false; - template auto _with_lock(Lock& lock, Func&& func) { Ref obc = this; From 71eef36644e50fdf269f898e707182e72e083eeb Mon Sep 17 00:00:00 2001 From: Matan Breizman Date: Sun, 2 Jun 2024 15:17:36 +0000 Subject: [PATCH 07/10] crimson/common/tri_mutex: add waiter_t::waiter_name Signed-off-by: Matan Breizman --- src/crimson/common/tri_mutex.cc | 8 ++++---- src/crimson/common/tri_mutex.h | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/crimson/common/tri_mutex.cc b/src/crimson/common/tri_mutex.cc index 94b0642ded010..698b9b43e174b 100644 --- a/src/crimson/common/tri_mutex.cc +++ b/src/crimson/common/tri_mutex.cc @@ -74,7 +74,7 @@ seastar::future<> tri_mutex::lock_for_read() return seastar::now(); } DEBUGDPP("can't lock_for_read, adding to waiters", *this); - waiters.emplace_back(seastar::promise<>(), type_t::read); + waiters.emplace_back(seastar::promise<>(), type_t::read, name); return waiters.back().pr.get_future(); } @@ -127,7 +127,7 @@ seastar::future<> tri_mutex::lock_for_write() return seastar::now(); } DEBUGDPP("can't lock_for_write, adding to waiters", *this); - waiters.emplace_back(seastar::promise<>(), type_t::write); + waiters.emplace_back(seastar::promise<>(), type_t::write, name); return waiters.back().pr.get_future(); } @@ -182,7 +182,7 @@ seastar::future<> tri_mutex::lock_for_excl() return seastar::now(); } DEBUGDPP("can't lock_for_excl, adding to waiters", *this); - waiters.emplace_back(seastar::promise<>(), type_t::exclusive); + waiters.emplace_back(seastar::promise<>(), type_t::exclusive, name); return waiters.back().pr.get_future(); } @@ -251,7 +251,7 @@ void tri_mutex::wake() default: assert(0); } - // TODO: DEBUGDPP("waking up {} ", *this); + DEBUGDPP("waking up {}", *this, waiter.waiter_name); waiter.pr.set_value(); waiters.pop_front(); } diff --git a/src/crimson/common/tri_mutex.h b/src/crimson/common/tri_mutex.h index fc825e09e3814..285d4b3c168fe 100644 --- a/src/crimson/common/tri_mutex.h +++ b/src/crimson/common/tri_mutex.h @@ -142,11 +142,12 @@ class tri_mutex : private read_lock, none, }; struct waiter_t { - waiter_t(seastar::promise<>&& pr, type_t type) + waiter_t(seastar::promise<>&& pr, type_t type, std::string_view waiter_name) : pr(std::move(pr)), type(type) {} seastar::promise<> pr; type_t type; + std::string_view waiter_name; }; seastar::circular_buffer waiters; const std::string name; From 251b9d4bca14c314b467fd77202e76a70ed90c0f Mon Sep 17 00:00:00 2001 From: Yingxin Cheng Date: Mon, 3 Jun 2024 14:31:35 +0800 Subject: [PATCH 08/10] crimson/common/tri_mutex: minor cleanup to be consistent Signed-off-by: Yingxin Cheng Signed-off-by: Matan Breizman --- src/crimson/common/tri_mutex.cc | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/crimson/common/tri_mutex.cc b/src/crimson/common/tri_mutex.cc index 698b9b43e174b..9576f8df6bca7 100644 --- a/src/crimson/common/tri_mutex.cc +++ b/src/crimson/common/tri_mutex.cc @@ -85,9 +85,8 @@ bool tri_mutex::try_lock_for_read() noexcept if (!writers && !exclusively_used && waiters.empty()) { ++readers; return true; - } else { - return false; } + return false; } void tri_mutex::unlock_for_read() @@ -135,11 +134,9 @@ bool tri_mutex::try_lock_for_write() noexcept { LOG_PREFIX(tri_mutex::try_lock_for_write()); DEBUGDPP("", *this); - if (!readers && !exclusively_used) { - if (waiters.empty()) { - ++writers; - return true; - } + if (!readers && !exclusively_used && waiters.empty()) { + ++writers; + return true; } return false; } From f63d76a2ae348e1419dab20fab9557ce483080ca Mon Sep 17 00:00:00 2001 From: Yingxin Cheng Date: Mon, 3 Jun 2024 14:33:26 +0800 Subject: [PATCH 09/10] crimson/common/tri_mutex: make lock() atomic if doesn't need wait Otherwise, promotion cannot be atomic with the 1st locker. Identified by: Matan Breizman Signed-off-by: Yingxin Cheng --- src/crimson/common/tri_mutex.cc | 24 +++++++++++++++--------- src/crimson/common/tri_mutex.h | 14 ++++++++------ src/crimson/osd/object_context.h | 19 +++++++++++++++---- 3 files changed, 38 insertions(+), 19 deletions(-) diff --git a/src/crimson/common/tri_mutex.cc b/src/crimson/common/tri_mutex.cc index 9576f8df6bca7..541ebfcf810b7 100644 --- a/src/crimson/common/tri_mutex.cc +++ b/src/crimson/common/tri_mutex.cc @@ -8,7 +8,8 @@ SET_SUBSYS(osd); //TODO: SET_SUBSYS(crimson_tri_mutex); -seastar::future<> read_lock::lock() +std::optional> +read_lock::lock() { return static_cast(this)->lock_for_read(); } @@ -18,7 +19,8 @@ void read_lock::unlock() static_cast(this)->unlock_for_read(); } -seastar::future<> write_lock::lock() +std::optional> +write_lock::lock() { return static_cast(this)->lock_for_write(); } @@ -28,7 +30,8 @@ void write_lock::unlock() static_cast(this)->unlock_for_write(); } -seastar::future<> excl_lock::lock() +std::optional> +excl_lock::lock() { return static_cast(this)->lock_for_excl(); } @@ -65,13 +68,14 @@ tri_mutex::~tri_mutex() assert(!is_acquired()); } -seastar::future<> tri_mutex::lock_for_read() +std::optional> +tri_mutex::lock_for_read() { LOG_PREFIX(tri_mutex::lock_for_read()); DEBUGDPP("", *this); if (try_lock_for_read()) { DEBUGDPP("lock_for_read successfully", *this); - return seastar::now(); + return std::nullopt; } DEBUGDPP("can't lock_for_read, adding to waiters", *this); waiters.emplace_back(seastar::promise<>(), type_t::read, name); @@ -117,13 +121,14 @@ void tri_mutex::demote_to_read() ++readers; } -seastar::future<> tri_mutex::lock_for_write() +std::optional> +tri_mutex::lock_for_write() { LOG_PREFIX(tri_mutex::lock_for_write()); DEBUGDPP("", *this); if (try_lock_for_write()) { DEBUGDPP("lock_for_write successfully", *this); - return seastar::now(); + return std::nullopt; } DEBUGDPP("can't lock_for_write, adding to waiters", *this); waiters.emplace_back(seastar::promise<>(), type_t::write, name); @@ -170,13 +175,14 @@ void tri_mutex::demote_to_write() } // for exclusive users -seastar::future<> tri_mutex::lock_for_excl() +std::optional> +tri_mutex::lock_for_excl() { LOG_PREFIX(tri_mutex::lock_for_excl()); DEBUGDPP("", *this); if (try_lock_for_excl()) { DEBUGDPP("lock_for_excl, successfully", *this); - return seastar::now(); + return std::nullopt; } DEBUGDPP("can't lock_for_excl, adding to waiters", *this); waiters.emplace_back(seastar::promise<>(), type_t::exclusive, name); diff --git a/src/crimson/common/tri_mutex.h b/src/crimson/common/tri_mutex.h index 285d4b3c168fe..d2e83d876437c 100644 --- a/src/crimson/common/tri_mutex.h +++ b/src/crimson/common/tri_mutex.h @@ -3,25 +3,27 @@ #pragma once +#include + #include #include #include "crimson/common/log.h" class read_lock { public: - seastar::future<> lock(); + std::optional> lock(); void unlock(); }; class write_lock { public: - seastar::future<> lock(); + std::optional> lock(); void unlock(); }; class excl_lock { public: - seastar::future<> lock(); + std::optional> lock(); void unlock(); }; @@ -87,7 +89,7 @@ class tri_mutex : private read_lock, } // for shared readers - seastar::future<> lock_for_read(); + std::optional> lock_for_read(); bool try_lock_for_read() noexcept; void unlock_for_read(); void promote_from_read(); @@ -97,7 +99,7 @@ class tri_mutex : private read_lock, } // for shared writers - seastar::future<> lock_for_write(); + std::optional> lock_for_write(); bool try_lock_for_write() noexcept; void unlock_for_write(); void promote_from_write(); @@ -107,7 +109,7 @@ class tri_mutex : private read_lock, } // for exclusive users - seastar::future<> lock_for_excl(); + std::optional> lock_for_excl(); bool try_lock_for_excl() noexcept; void unlock_for_excl(); bool is_excl_acquired() const { diff --git a/src/crimson/osd/object_context.h b/src/crimson/osd/object_context.h index 2de28558b5660..466f22b8372f3 100644 --- a/src/crimson/osd/object_context.h +++ b/src/crimson/osd/object_context.h @@ -138,10 +138,21 @@ class ObjectContext : public ceph::common::intrusive_lru_base< template auto _with_lock(Lock& lock, Func&& func) { Ref obc = this; - return lock.lock().then([&lock, func = std::forward(func), obc]() mutable { - return seastar::futurize_invoke(func).finally([&lock, obc] { - lock.unlock(); - }); + auto maybe_fut = lock.lock(); + return seastar::futurize_invoke([ + maybe_fut=std::move(maybe_fut), + func=std::forward(func)]() mutable { + if (maybe_fut) { + return std::move(*maybe_fut + ).then([func=std::forward(func)]() mutable { + return seastar::futurize_invoke(func); + }); + } else { + // atomically calling func upon locking + return seastar::futurize_invoke(func); + } + }).finally([&lock, obc] { + lock.unlock(); }); } From 1675ce8c1b5347ad13b65389686cd45853a4149e Mon Sep 17 00:00:00 2001 From: Matan Breizman Date: Thu, 6 Jun 2024 09:48:09 +0000 Subject: [PATCH 10/10] crimson/osd/object_context_loader: get_or_load to support atomicity make use of try_lock in order to support atomicity when called in ObjectContext::_with_lock() Co-authored-by: Yingxin Cheng Signed-off-by: Matan Breizman --- src/crimson/osd/object_context_loader.cc | 54 ++++++++++++++++++------ src/crimson/osd/object_context_loader.h | 12 ++++++ 2 files changed, 52 insertions(+), 14 deletions(-) diff --git a/src/crimson/osd/object_context_loader.cc b/src/crimson/osd/object_context_loader.cc index de147f2ee0e05..3f3f78964c439 100644 --- a/src/crimson/osd/object_context_loader.cc +++ b/src/crimson/osd/object_context_loader.cc @@ -150,20 +150,46 @@ using crimson::common::local_conf; dpp, obc->get_oid(), obc->fully_loaded, obc->invalidated_by_interval_change); - return interruptor::with_lock(obc->loading_mutex, - [this, obc, existed, FNAME] { - if (existed) { - ceph_assert(obc->is_valid() && obc->is_loaded()); - DEBUGDPP("cache hit on {}", dpp, obc->get_oid()); - return load_obc_iertr::make_ready_future(obc); - } else { - DEBUGDPP("cache miss on {}", dpp, obc->get_oid()); - return obc->template with_promoted_lock( - [obc, this] { - return load_obc(obc); - }); - } - }); + if (existed) { + // obc is already loaded - avoid loading_mutex usage + DEBUGDPP("cache hit on {}", dpp, obc->get_oid()); + return get_obc(obc, existed); + } + // See ObjectContext::_with_lock(), + // this function must be able to support atomicity before + // acquiring the lock + if (obc->loading_mutex.try_lock()) { + return _get_or_load_obc(obc, existed + ).finally([obc]{ + obc->loading_mutex.unlock(); + }); + } else { + return interruptor::with_lock(obc->loading_mutex, + [this, obc, existed, FNAME] { + // Previous user already loaded the obc + DEBUGDPP("{} finished waiting for loader, cache hit on {}", + dpp, FNAME, obc->get_oid()); + return get_obc(obc, existed); + }); + } + } + + template + ObjectContextLoader::load_obc_iertr::future + ObjectContextLoader::_get_or_load_obc(ObjectContextRef obc, + bool existed) + { + LOG_PREFIX(ObjectContextLoader::_get_or_load_obc); + if (existed) { + DEBUGDPP("cache hit on {}", dpp, obc->get_oid()); + return get_obc(obc, existed); + } else { + DEBUGDPP("cache miss on {}", dpp, obc->get_oid()); + return obc->template with_promoted_lock( + [obc, this] { + return load_obc(obc); + }); + } } ObjectContextLoader::load_obc_iertr::future<> diff --git a/src/crimson/osd/object_context_loader.h b/src/crimson/osd/object_context_loader.h index b51e10caf77e8..80cea787630d6 100644 --- a/src/crimson/osd/object_context_loader.h +++ b/src/crimson/osd/object_context_loader.h @@ -80,6 +80,18 @@ class ObjectContextLoader { get_or_load_obc(ObjectContextRef obc, bool existed); + template + load_obc_iertr::future + _get_or_load_obc(ObjectContextRef obc, + bool existed); + + static inline load_obc_iertr::future + get_obc(ObjectContextRef obc, + bool existed) { + ceph_assert(existed && obc->is_valid() && obc->is_loaded()); + return load_obc_iertr::make_ready_future(obc); + } + load_obc_iertr::future load_obc(ObjectContextRef obc); };