From 57eddebb9ed723eb701ad59430c37c37f1a9879e Mon Sep 17 00:00:00 2001 From: Harsha Rajendran Date: Fri, 18 Mar 2022 09:15:29 -0400 Subject: [PATCH] Fixes #15691 (#16294) Added logic to have a max retry count for providers returning a BUSY state. Added logic not to traverse the default ota list more than once only in the case of QueryImageStatus of NOT_AVAILABLE. Cleared the last used provider on reboot to ensure determinism in provider-selection. --- scripts/tests/ota_test.sh | 14 +++-- .../GenericOTARequestorDriver.cpp | 61 ++++++++++++++----- .../ota-requestor/GenericOTARequestorDriver.h | 5 +- .../clusters/ota-requestor/OTARequestor.cpp | 5 +- src/app/clusters/ota-requestor/OTARequestor.h | 6 +- .../ota-requestor/OTARequestorDriver.h | 4 +- 6 files changed, 70 insertions(+), 25 deletions(-) diff --git a/scripts/tests/ota_test.sh b/scripts/tests/ota_test.sh index 2838c106b7e11b..eecded2912c86a 100755 --- a/scripts/tests/ota_test.sh +++ b/scripts/tests/ota_test.sh @@ -65,10 +65,16 @@ timeout 30 grep -q "OTA image downloaded to" <(tail -n0 -f /tmp/ota/requestor-lo echo "Exiting, logs are in tmp/ota/" -killall -e "$OTA_PROVIDER_APP" "$OTA_REQUESTOR_APP" - if cmp "$OTA_DOWNLOAD_PATH" "$FIRMWARE_BIN"; then - echo Test passed && exit 0 + TEST_RESULT="Test passed" + RETURN_VALUE=0 else - echo Test failed && exit 1 + TEST_RESULT="Test failed" + RETURN_VALUE=1 fi + +killall -e "$OTA_PROVIDER_APP" "$OTA_REQUESTOR_APP" +rm -f "$FIRMWARE_OTA" "$FIRMWARE_BIN" "$OTA_DOWNLOAD_PATH" + +echo "$TEST_RESULT" +exit "$RETURN_VALUE" diff --git a/src/app/clusters/ota-requestor/GenericOTARequestorDriver.cpp b/src/app/clusters/ota-requestor/GenericOTARequestorDriver.cpp index f278dfb1ab40e2..7f0cda72eeaa3f 100644 --- a/src/app/clusters/ota-requestor/GenericOTARequestorDriver.cpp +++ b/src/app/clusters/ota-requestor/GenericOTARequestorDriver.cpp @@ -23,11 +23,14 @@ // // This particular implementation of the OTARequestorDriver makes the following choices: // - Only a single timer can be active at any given moment -// - The default provider timer is running if and only if there is no update in progress (the OTARequestor +// - The periodic query timer is running if and only if there is no update in progress (the OTARequestor // UpdateState is kIdle) // - AnnounceOTAProviders command is ignored if an update is in progress // - The provider location passed in AnnounceOTAProviders is used in a single query (possibly retried) and then discarded // - Explicitly triggering a query through TriggerImmediateQuery() cancels any in-progress update +// - A QueryImage call results in the driver iterating through the list of default OTA providers, from beginning to the end, until a +// provider successfully transfers the OTA image. If a provider is busy, it will be retried a set number of times before moving +// to the next available one. If all else fails, the periodic query timer is kicked off again. #include #include @@ -55,8 +58,9 @@ GenericOTARequestorDriver * ToDriver(void * context) void GenericOTARequestorDriver::Init(OTARequestorInterface * requestor, OTAImageProcessorInterface * processor) { - mRequestor = requestor; - mImageProcessor = processor; + mRequestor = requestor; + mImageProcessor = processor; + mProviderRetryCount = 0; if (mImageProcessor->IsFirstImageRun()) { @@ -117,7 +121,7 @@ void GenericOTARequestorDriver::HandleIdleState(IdleStateReason reason) break; case IdleStateReason::kInvalidSession: // An invalid session is detected which may be temporary so try to query the same provider again - DeviceLayer::SystemLayer().ScheduleLambda([this] { mRequestor->TriggerImmediateQueryInternal(); }); + SendQueryImage(); break; } } @@ -146,14 +150,21 @@ void GenericOTARequestorDriver::UpdateNotFound(UpdateNotFoundReason reason, Syst break; case UpdateNotFoundReason::kBusy: willTryAnotherQuery = true; - break; + if (mProviderRetryCount <= kMaxBusyProviderRetryCount) + { + break; + } + ChipLogProgress(SoftwareUpdate, "Max Busy Provider retries reached. Attempting to get next Provider."); + __attribute__((fallthrough)); // fallthrough case UpdateNotFoundReason::kNotAvailable: { // IMPLEMENTATION CHOICE: // This implementation schedules a query only if a different provider is available - Optional lastUsedProvider; - mRequestor->GetProviderLocation(lastUsedProvider); - if ((GetNextProviderLocation(providerLocation) != true) || - (lastUsedProvider.HasValue() && ProviderLocationsEqual(providerLocation, lastUsedProvider.Value()))) + // Note that the "listExhausted" being set to TRUE, implies that the entire list of + // defaultOTAProviders has been traversed. On bootup, the last provider is reset + // which ensures that every QueryImage call will ensure that the list is traversed from + // start to end, until an OTA is successfully completed. + bool listExhausted = false; + if ((GetNextProviderLocation(providerLocation, listExhausted) != true) || (listExhausted == true)) { willTryAnotherQuery = false; } @@ -164,9 +175,6 @@ void GenericOTARequestorDriver::UpdateNotFound(UpdateNotFoundReason reason, Syst } break; } - default: - willTryAnotherQuery = false; - break; } if (delay < kDefaultDelayedActionTime) @@ -182,6 +190,7 @@ void GenericOTARequestorDriver::UpdateNotFound(UpdateNotFoundReason reason, Syst else { ChipLogProgress(SoftwareUpdate, "UpdateNotFound, not scheduling further retries"); + StartDefaultProviderTimer(); } } @@ -295,7 +304,22 @@ void GenericOTARequestorDriver::ProcessAnnounceOTAProviders( void GenericOTARequestorDriver::SendQueryImage() { - + Optional lastUsedProvider; + mRequestor->GetProviderLocation(lastUsedProvider); + if (!lastUsedProvider.HasValue()) + { + ProviderLocationType providerLocation; + bool listExhausted = false; + if (GetNextProviderLocation(providerLocation, listExhausted) == true) + { + mRequestor->SetCurrentProviderLocation(providerLocation); + } + else + { + ChipLogProgress(SoftwareUpdate, "No provider available"); + return; + } + } // IMPLEMENTATION CHOICE // In this implementation explicitly triggering a query cancels any in-progress update. UpdateCancelled(); @@ -304,6 +328,8 @@ void GenericOTARequestorDriver::SendQueryImage() // TriggerImmediateQueryInternal() will cause the state to change from kIdle StopDefaultProviderTimer(); + mProviderRetryCount++; + DeviceLayer::SystemLayer().ScheduleLambda([this] { mRequestor->TriggerImmediateQueryInternal(); }); } @@ -313,7 +339,8 @@ void GenericOTARequestorDriver::DefaultProviderTimerHandler(System::Layer * syst // Determine which provider to query next ProviderLocationType providerLocation; - if (GetNextProviderLocation(providerLocation) != true) + bool listExhausted = false; + if (GetNextProviderLocation(providerLocation, listExhausted) != true) { StartDefaultProviderTimer(); return; @@ -328,7 +355,6 @@ void GenericOTARequestorDriver::StartDefaultProviderTimer() { ChipLogProgress(SoftwareUpdate, "Starting the Default Provider timer, timeout: %u seconds", (unsigned int) mPeriodicQueryTimeInterval); - ScheduleDelayedAction( System::Clock::Seconds32(mPeriodicQueryTimeInterval), [](System::Layer *, void * context) { @@ -352,10 +378,12 @@ void GenericOTARequestorDriver::StopDefaultProviderTimer() * circular list and return the next value (based on the last used provider). If the list of DefaultOtaProviders is empty, FALSE is * returned. */ -bool GenericOTARequestorDriver::GetNextProviderLocation(ProviderLocationType & providerLocation) +bool GenericOTARequestorDriver::GetNextProviderLocation(ProviderLocationType & providerLocation, bool & listExhausted) { Optional lastUsedProvider; mRequestor->GetProviderLocation(lastUsedProvider); + mProviderRetryCount = 0; // Reset provider retry count + listExhausted = false; // Iterate through the default providers list and find the last used provider. If found, return the provider after it auto iterator = mRequestor->GetDefaultOTAProviderListIterator(); @@ -376,6 +404,7 @@ bool GenericOTARequestorDriver::GetNextProviderLocation(ProviderLocationType & p if (iterator.Next()) { providerLocation = iterator.GetValue(); + listExhausted = true; return true; } else diff --git a/src/app/clusters/ota-requestor/GenericOTARequestorDriver.h b/src/app/clusters/ota-requestor/GenericOTARequestorDriver.h index 5fe392fd4e6516..5b2e51762ea0c3 100644 --- a/src/app/clusters/ota-requestor/GenericOTARequestorDriver.h +++ b/src/app/clusters/ota-requestor/GenericOTARequestorDriver.h @@ -68,7 +68,7 @@ class GenericOTARequestorDriver : public OTARequestorDriver void ProcessAnnounceOTAProviders(const ProviderLocationType & providerLocation, app::Clusters::OtaSoftwareUpdateRequestor::OTAAnnouncementReason announcementReason) override; void SendQueryImage() override; - bool GetNextProviderLocation(ProviderLocationType & providerLocation) override; + bool GetNextProviderLocation(ProviderLocationType & providerLocation, bool & listExhausted) override; protected: void StartDefaultProviderTimer(); @@ -82,6 +82,9 @@ class GenericOTARequestorDriver : public OTARequestorDriver OTAImageProcessorInterface * mImageProcessor = nullptr; uint32_t mOtaStartDelaySec = 0; uint32_t mPeriodicQueryTimeInterval = (24 * 60 * 60); // Timeout for querying providers on the default OTA provider list + // Maximum number of times to retry a BUSY OTA provider before moving to the next available one + static constexpr uint8_t kMaxBusyProviderRetryCount = 3; + uint8_t mProviderRetryCount; // Track retry count for the current provider }; } // namespace DeviceLayer diff --git a/src/app/clusters/ota-requestor/OTARequestor.cpp b/src/app/clusters/ota-requestor/OTARequestor.cpp index 29b8efc03a3234..059c590f035fbd 100644 --- a/src/app/clusters/ota-requestor/OTARequestor.cpp +++ b/src/app/clusters/ota-requestor/OTARequestor.cpp @@ -475,7 +475,8 @@ void OTARequestor::TriggerImmediateQueryInternal() OTARequestorInterface::OTATriggerResult OTARequestor::TriggerImmediateQuery() { ProviderLocationType providerLocation; - if (mOtaRequestorDriver->GetNextProviderLocation(providerLocation) != true) + bool listExhausted = false; + if (mOtaRequestorDriver->GetNextProviderLocation(providerLocation, listExhausted) != true) { ChipLogError(SoftwareUpdate, "No OTA Providers available"); return kNoProviderKnown; @@ -787,6 +788,8 @@ CHIP_ERROR OTARequestor::SendNotifyUpdateAppliedRequest(OperationalDeviceProxy & Controller::OtaSoftwareUpdateProviderCluster cluster; cluster.Associate(&deviceProxy, mProviderLocation.Value().endpoint); + mProviderLocation.ClearValue(); // Clearing the last used provider location to start afresh on reboot + return cluster.InvokeCommand(args, this, OnNotifyUpdateAppliedResponse, OnNotifyUpdateAppliedFailure); } diff --git a/src/app/clusters/ota-requestor/OTARequestor.h b/src/app/clusters/ota-requestor/OTARequestor.h index 47ca0fe7c4bd9e..7ac6d08a0bae8b 100644 --- a/src/app/clusters/ota-requestor/OTARequestor.h +++ b/src/app/clusters/ota-requestor/OTARequestor.h @@ -130,7 +130,6 @@ class OTARequestor : public OTARequestorInterface, public BDXDownloader::StateDe mUpdateToken = updateToken; } - // Schedule the initializations that needs to be performed in the CHIP context DeviceLayer::PlatformMgr().ScheduleWork(InitState, reinterpret_cast(this)); @@ -333,7 +332,10 @@ class OTARequestor : public OTARequestorInterface, public BDXDownloader::StateDe OTAUpdateStateEnum mCurrentUpdateState = OTAUpdateStateEnum::kUnknown; Server * mServer = nullptr; ProviderLocationList mDefaultOtaProviderList; - Optional mProviderLocation; // Provider location used for the current/last update in progress + // Provider location used for the current/last update in progress. Note that on reboot, this value will be read from the + // persistent storage (if available), used for sending the NotifyApplied message, and then cleared. This will ensure determinism + // in the OTARequestorDriver on reboot. + Optional mProviderLocation; }; } // namespace chip diff --git a/src/app/clusters/ota-requestor/OTARequestorDriver.h b/src/app/clusters/ota-requestor/OTARequestorDriver.h index 40c8af5a37d399..fe2e52fabb3788 100644 --- a/src/app/clusters/ota-requestor/OTARequestorDriver.h +++ b/src/app/clusters/ota-requestor/OTARequestorDriver.h @@ -128,7 +128,9 @@ class OTARequestorDriver // Driver picks the OTA Provider that should be used for the next query and update. The Provider is picked according to // the driver's internal logic such as, for example, traversing the default providers list. // Returns true if there is a Provider available for the next query, returns false otherwise. - virtual bool GetNextProviderLocation(ProviderLocationType & providerLocation) = 0; + // [in] listExhausted - set to TRUE if the list of providers has been traversed until the end and has looped + // back to the beginning. + virtual bool GetNextProviderLocation(ProviderLocationType & providerLocation, bool & listExhausted) = 0; }; } // namespace chip