diff --git a/src/app/OperationalSessionSetup.cpp b/src/app/OperationalSessionSetup.cpp index 62551d66c22247..2e9d572a18618e 100644 --- a/src/app/OperationalSessionSetup.cpp +++ b/src/app/OperationalSessionSetup.cpp @@ -53,6 +53,14 @@ void OperationalSessionSetup::MoveToState(State aTargetState) ChipLogDetail(Discovery, "OperationalSessionSetup[%u:" ChipLogFormatX64 "]: State change %d --> %d", mPeerId.GetFabricIndex(), ChipLogValueX64(mPeerId.GetNodeId()), to_underlying(mState), to_underlying(aTargetState)); + +#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES + if (mState == State::WaitingForRetry) + { + CancelSessionSetupReattempt(); + } +#endif + mState = aTargetState; if (aTargetState != State::Connecting) @@ -64,7 +72,9 @@ void OperationalSessionSetup::MoveToState(State aTargetState) bool OperationalSessionSetup::AttachToExistingSecureSession() { - VerifyOrReturnError(mState == State::NeedsAddress || mState == State::ResolvingAddress || mState == State::HasAddress, false); + VerifyOrReturnError(mState == State::NeedsAddress || mState == State::ResolvingAddress || mState == State::HasAddress || + mState == State::WaitingForRetry, + false); auto sessionHandle = mInitParams.sessionManager->FindSecureSessionForNode(mPeerId, MakeOptional(Transport::SecureSession::Type::kCASE)); @@ -119,6 +129,7 @@ void OperationalSessionSetup::Connect(Callback::Callback * on break; case State::ResolvingAddress: + case State::WaitingForRetry: isConnected = AttachToExistingSecureSession(); break; @@ -320,20 +331,27 @@ void OperationalSessionSetup::OnSessionEstablishmentError(CHIP_ERROR error) { #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES // Make a copy of the ReliableMessageProtocolConfig, since our - // mCaseClient is about to go away. + // mCaseClient is about to go away once we change state. ReliableMessageProtocolConfig remoteMprConfig = mCASEClient->GetRemoteMRPIntervals(); #endif + // Move to the ResolvingAddress state, in case we have more results, + // since we expect to receive results in that state. + MoveToState(State::ResolvingAddress); if (CHIP_NO_ERROR == Resolver::Instance().TryNextResult(mAddressLookupHandle)) { #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES - // Our retry is going to be immediate, once the event loop spins. + // Our retry has already been kicked off. NotifyRetryHandlers(error, remoteMprConfig, System::Clock::kZero); #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES - MoveToState(State::ResolvingAddress); return; } + // Moving back to the Connecting state would be a bit of a lie, since we + // don't have an mCASEClient. Just go back to NeedsAddress, since + // that's really where we are now. + MoveToState(State::NeedsAddress); + #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES if (mRemainingAttempts > 0) { @@ -341,6 +359,7 @@ void OperationalSessionSetup::OnSessionEstablishmentError(CHIP_ERROR error) CHIP_ERROR err = ScheduleSessionSetupReattempt(reattemptDelay); if (err == CHIP_NO_ERROR) { + MoveToState(State::WaitingForRetry); NotifyRetryHandlers(error, remoteMprConfig, reattemptDelay); return; } @@ -406,6 +425,10 @@ OperationalSessionSetup::~OperationalSessionSetup() // Make sure we don't leak it. mClientPool->Release(mCASEClient); } + +#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES + CancelSessionSetupReattempt(); +#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES } CHIP_ERROR OperationalSessionSetup::LookupPeerAddress() @@ -553,27 +576,32 @@ CHIP_ERROR OperationalSessionSetup::ScheduleSessionSetupReattempt(System::Clock: return err; } +void OperationalSessionSetup::CancelSessionSetupReattempt() +{ + // If we can't get a system layer, there is no way for us to cancel things + // at this point, but hopefully that's because everything is torn down + // anyway and hence the timer will not fire. + auto * sessionManager = mInitParams.exchangeMgr->GetSessionManager(); + VerifyOrReturn(sessionManager != nullptr); + + auto * systemLayer = sessionManager->SystemLayer(); + VerifyOrReturn(systemLayer != nullptr); + + systemLayer->CancelTimer(TrySetupAgain, this); +} + void OperationalSessionSetup::TrySetupAgain(System::Layer * systemLayer, void * state) { auto * self = static_cast(state); - CHIP_ERROR err = CHIP_NO_ERROR; - - if (self->mState != State::NeedsAddress) - { - err = CHIP_ERROR_INCORRECT_STATE; - } - else + self->MoveToState(State::ResolvingAddress); + CHIP_ERROR err = self->LookupPeerAddress(); + if (err == CHIP_NO_ERROR) { - self->MoveToState(State::ResolvingAddress); - err = self->LookupPeerAddress(); - if (err == CHIP_NO_ERROR) - { - return; - } + return; } - // Give up; we're either in a bad state or could not start a lookup. + // Give up; we could not start a lookup. self->DequeueConnectionCallbacks(err); // Do not touch `self` instance anymore; it has been destroyed in DequeueConnectionCallbacks. } diff --git a/src/app/OperationalSessionSetup.h b/src/app/OperationalSessionSetup.h index cb3f624ba6724e..60f0f0ecaba5b7 100644 --- a/src/app/OperationalSessionSetup.h +++ b/src/app/OperationalSessionSetup.h @@ -253,6 +253,8 @@ class DLL_EXPORT OperationalSessionSetup : public SessionDelegate, HasAddress, // Have an address, CASE handshake not started yet. Connecting, // CASE handshake in progress. SecureConnected, // CASE session established. + WaitingForRetry, // No address known, but a retry is pending. Added at + // end to make logs easier to understand. }; CASEClientInitParams mInitParams; @@ -335,6 +337,12 @@ class DLL_EXPORT OperationalSessionSetup : public SessionDelegate, */ CHIP_ERROR ScheduleSessionSetupReattempt(System::Clock::Seconds16 & timerDelay); + /** + * Cancel a scheduled setup reattempt, if we can (i.e. if we still have + * access to the SystemLayer). + */ + void CancelSessionSetupReattempt(); + /** * Helper for our backoff retry timer. */ diff --git a/src/lib/address_resolve/AddressResolve.h b/src/lib/address_resolve/AddressResolve.h index 11c0959e3357df..8034aed7c1f28a 100644 --- a/src/lib/address_resolve/AddressResolve.h +++ b/src/lib/address_resolve/AddressResolve.h @@ -204,19 +204,30 @@ class Resolver /// in progress) virtual CHIP_ERROR LookupNode(const NodeLookupRequest & request, Impl::NodeLookupHandle & handle) = 0; - /// Inform the Lookup handle that the previous node lookup was not sufficient - /// for the purpose of the caller (e.g establishing a session fails with the - /// result of the previous lookup), and that more data is needed. + /// Inform the Lookup handle that the previous node lookup was not + /// sufficient for the purpose of the caller (e.g establishing a session + /// fails with the result of the previous lookup), and that more data is + /// needed. /// - /// If this returns CHIP_NO_ERROR, the following is expected: - /// - The listener OnNodeAddressResolved will be called with the additional data. - /// - handle must NOT be destroyed while the lookup is in progress (it - /// is part of an internal 'lookup list') - /// - handle must NOT be reused (the lookup is done on a per-node basis - /// and maintains lookup data internally while the operation is still - /// in progress) + /// This method must be called on a handle that is no longer active to + /// succeed. + /// + /// If the handle is no longer active and has results that have not been + /// delivered to the listener yet, the listener's OnNodeAddressResolved will + /// be called synchronously before the method returns. Note that depending + /// on the listener implementation this can end up destroying the handle + /// and/or the listener. + /// + /// This method will return CHIP_NO_ERROR if and only if it has called + /// OnNodeAddressResolved. + /// + /// This method will return CHIP_ERROR_INCORRECT_STATE if the handle is + /// still active. + /// + /// This method will return CHIP_ERROR_WELL_EMPTY if there are no more + /// results. /// - /// If no additional data is available at the time of the request, it returns CHIP_ERROR_WELL_EMPTY. + /// This method may return other errors in some cases. virtual CHIP_ERROR TryNextResult(Impl::NodeLookupHandle & handle) = 0; /// Stops an active lookup request. diff --git a/src/lib/address_resolve/AddressResolve_DefaultImpl.cpp b/src/lib/address_resolve/AddressResolve_DefaultImpl.cpp index a6b1bbde423dfb..10e50cbd734cfe 100644 --- a/src/lib/address_resolve/AddressResolve_DefaultImpl.cpp +++ b/src/lib/address_resolve/AddressResolve_DefaultImpl.cpp @@ -187,20 +187,14 @@ CHIP_ERROR Resolver::LookupNode(const NodeLookupRequest & request, Impl::NodeLoo CHIP_ERROR Resolver::TryNextResult(Impl::NodeLookupHandle & handle) { - VerifyOrReturnError(mSystemLayer != nullptr, CHIP_ERROR_INCORRECT_STATE); VerifyOrReturnError(!mActiveLookups.Contains(&handle), CHIP_ERROR_INCORRECT_STATE); VerifyOrReturnError(handle.HasLookupResult(), CHIP_ERROR_WELL_EMPTY); - return mSystemLayer->ScheduleWork(&OnTryNextResult, static_cast(&handle)); -} - -void Resolver::OnTryNextResult(System::Layer * layer, void * context) -{ - auto handle = static_cast(context); - auto listener = handle->GetListener(); - auto peerId = handle->GetRequest().GetPeerId(); - auto result = handle->TakeLookupResult(); + auto listener = handle.GetListener(); + auto peerId = handle.GetRequest().GetPeerId(); + auto result = handle.TakeLookupResult(); listener->OnNodeAddressResolved(peerId, result); + return CHIP_NO_ERROR; } CHIP_ERROR Resolver::CancelLookup(Impl::NodeLookupHandle & handle, FailureCallback cancel_method) diff --git a/src/lib/address_resolve/AddressResolve_DefaultImpl.h b/src/lib/address_resolve/AddressResolve_DefaultImpl.h index a70e3466326716..ac6d7dd8d1f4d0 100644 --- a/src/lib/address_resolve/AddressResolve_DefaultImpl.h +++ b/src/lib/address_resolve/AddressResolve_DefaultImpl.h @@ -183,7 +183,6 @@ class Resolver : public ::chip::AddressResolve::Resolver, public Dnssd::Operatio private: static void OnResolveTimer(System::Layer * layer, void * context) { static_cast(context)->HandleTimer(); } - static void OnTryNextResult(System::Layer * layer, void * context); /// Timer on lookup node events: min and max search times. void HandleTimer();