Skip to content

Commit

Permalink
Fixes #15691 (#16294)
Browse files Browse the repository at this point in the history
Added logic to have a max retry count for providers returning a BUSY state.
Added logic not to traverse the default ota list more than once only in the case of QueryImageStatus of NOT_AVAILABLE.
Cleared the last used provider on reboot to ensure determinism in provider-selection.
  • Loading branch information
harsha-rajendran authored Mar 18, 2022
1 parent f68cca0 commit 57eddeb
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 25 deletions.
14 changes: 10 additions & 4 deletions scripts/tests/ota_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,16 @@ timeout 30 grep -q "OTA image downloaded to" <(tail -n0 -f /tmp/ota/requestor-lo

echo "Exiting, logs are in tmp/ota/"

killall -e "$OTA_PROVIDER_APP" "$OTA_REQUESTOR_APP"

if cmp "$OTA_DOWNLOAD_PATH" "$FIRMWARE_BIN"; then
echo Test passed && exit 0
TEST_RESULT="Test passed"
RETURN_VALUE=0
else
echo Test failed && exit 1
TEST_RESULT="Test failed"
RETURN_VALUE=1
fi

killall -e "$OTA_PROVIDER_APP" "$OTA_REQUESTOR_APP"
rm -f "$FIRMWARE_OTA" "$FIRMWARE_BIN" "$OTA_DOWNLOAD_PATH"

echo "$TEST_RESULT"
exit "$RETURN_VALUE"
61 changes: 45 additions & 16 deletions src/app/clusters/ota-requestor/GenericOTARequestorDriver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,14 @@
//
// This particular implementation of the OTARequestorDriver makes the following choices:
// - Only a single timer can be active at any given moment
// - The default provider timer is running if and only if there is no update in progress (the OTARequestor
// - The periodic query timer is running if and only if there is no update in progress (the OTARequestor
// UpdateState is kIdle)
// - AnnounceOTAProviders command is ignored if an update is in progress
// - The provider location passed in AnnounceOTAProviders is used in a single query (possibly retried) and then discarded
// - Explicitly triggering a query through TriggerImmediateQuery() cancels any in-progress update
// - A QueryImage call results in the driver iterating through the list of default OTA providers, from beginning to the end, until a
// provider successfully transfers the OTA image. If a provider is busy, it will be retried a set number of times before moving
// to the next available one. If all else fails, the periodic query timer is kicked off again.

#include <platform/CHIPDeviceLayer.h>
#include <platform/OTAImageProcessor.h>
Expand Down Expand Up @@ -55,8 +58,9 @@ GenericOTARequestorDriver * ToDriver(void * context)

void GenericOTARequestorDriver::Init(OTARequestorInterface * requestor, OTAImageProcessorInterface * processor)
{
mRequestor = requestor;
mImageProcessor = processor;
mRequestor = requestor;
mImageProcessor = processor;
mProviderRetryCount = 0;

if (mImageProcessor->IsFirstImageRun())
{
Expand Down Expand Up @@ -117,7 +121,7 @@ void GenericOTARequestorDriver::HandleIdleState(IdleStateReason reason)
break;
case IdleStateReason::kInvalidSession:
// An invalid session is detected which may be temporary so try to query the same provider again
DeviceLayer::SystemLayer().ScheduleLambda([this] { mRequestor->TriggerImmediateQueryInternal(); });
SendQueryImage();
break;
}
}
Expand Down Expand Up @@ -146,14 +150,21 @@ void GenericOTARequestorDriver::UpdateNotFound(UpdateNotFoundReason reason, Syst
break;
case UpdateNotFoundReason::kBusy:
willTryAnotherQuery = true;
break;
if (mProviderRetryCount <= kMaxBusyProviderRetryCount)
{
break;
}
ChipLogProgress(SoftwareUpdate, "Max Busy Provider retries reached. Attempting to get next Provider.");
__attribute__((fallthrough)); // fallthrough
case UpdateNotFoundReason::kNotAvailable: {
// IMPLEMENTATION CHOICE:
// This implementation schedules a query only if a different provider is available
Optional<ProviderLocationType> lastUsedProvider;
mRequestor->GetProviderLocation(lastUsedProvider);
if ((GetNextProviderLocation(providerLocation) != true) ||
(lastUsedProvider.HasValue() && ProviderLocationsEqual(providerLocation, lastUsedProvider.Value())))
// Note that the "listExhausted" being set to TRUE, implies that the entire list of
// defaultOTAProviders has been traversed. On bootup, the last provider is reset
// which ensures that every QueryImage call will ensure that the list is traversed from
// start to end, until an OTA is successfully completed.
bool listExhausted = false;
if ((GetNextProviderLocation(providerLocation, listExhausted) != true) || (listExhausted == true))
{
willTryAnotherQuery = false;
}
Expand All @@ -164,9 +175,6 @@ void GenericOTARequestorDriver::UpdateNotFound(UpdateNotFoundReason reason, Syst
}
break;
}
default:
willTryAnotherQuery = false;
break;
}

if (delay < kDefaultDelayedActionTime)
Expand All @@ -182,6 +190,7 @@ void GenericOTARequestorDriver::UpdateNotFound(UpdateNotFoundReason reason, Syst
else
{
ChipLogProgress(SoftwareUpdate, "UpdateNotFound, not scheduling further retries");
StartDefaultProviderTimer();
}
}

Expand Down Expand Up @@ -295,7 +304,22 @@ void GenericOTARequestorDriver::ProcessAnnounceOTAProviders(

void GenericOTARequestorDriver::SendQueryImage()
{

Optional<ProviderLocationType> lastUsedProvider;
mRequestor->GetProviderLocation(lastUsedProvider);
if (!lastUsedProvider.HasValue())
{
ProviderLocationType providerLocation;
bool listExhausted = false;
if (GetNextProviderLocation(providerLocation, listExhausted) == true)
{
mRequestor->SetCurrentProviderLocation(providerLocation);
}
else
{
ChipLogProgress(SoftwareUpdate, "No provider available");
return;
}
}
// IMPLEMENTATION CHOICE
// In this implementation explicitly triggering a query cancels any in-progress update.
UpdateCancelled();
Expand All @@ -304,6 +328,8 @@ void GenericOTARequestorDriver::SendQueryImage()
// TriggerImmediateQueryInternal() will cause the state to change from kIdle
StopDefaultProviderTimer();

mProviderRetryCount++;

DeviceLayer::SystemLayer().ScheduleLambda([this] { mRequestor->TriggerImmediateQueryInternal(); });
}

Expand All @@ -313,7 +339,8 @@ void GenericOTARequestorDriver::DefaultProviderTimerHandler(System::Layer * syst

// Determine which provider to query next
ProviderLocationType providerLocation;
if (GetNextProviderLocation(providerLocation) != true)
bool listExhausted = false;
if (GetNextProviderLocation(providerLocation, listExhausted) != true)
{
StartDefaultProviderTimer();
return;
Expand All @@ -328,7 +355,6 @@ void GenericOTARequestorDriver::StartDefaultProviderTimer()
{
ChipLogProgress(SoftwareUpdate, "Starting the Default Provider timer, timeout: %u seconds",
(unsigned int) mPeriodicQueryTimeInterval);

ScheduleDelayedAction(
System::Clock::Seconds32(mPeriodicQueryTimeInterval),
[](System::Layer *, void * context) {
Expand All @@ -352,10 +378,12 @@ void GenericOTARequestorDriver::StopDefaultProviderTimer()
* circular list and return the next value (based on the last used provider). If the list of DefaultOtaProviders is empty, FALSE is
* returned.
*/
bool GenericOTARequestorDriver::GetNextProviderLocation(ProviderLocationType & providerLocation)
bool GenericOTARequestorDriver::GetNextProviderLocation(ProviderLocationType & providerLocation, bool & listExhausted)
{
Optional<ProviderLocationType> lastUsedProvider;
mRequestor->GetProviderLocation(lastUsedProvider);
mProviderRetryCount = 0; // Reset provider retry count
listExhausted = false;

// Iterate through the default providers list and find the last used provider. If found, return the provider after it
auto iterator = mRequestor->GetDefaultOTAProviderListIterator();
Expand All @@ -376,6 +404,7 @@ bool GenericOTARequestorDriver::GetNextProviderLocation(ProviderLocationType & p
if (iterator.Next())
{
providerLocation = iterator.GetValue();
listExhausted = true;
return true;
}
else
Expand Down
5 changes: 4 additions & 1 deletion src/app/clusters/ota-requestor/GenericOTARequestorDriver.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ class GenericOTARequestorDriver : public OTARequestorDriver
void ProcessAnnounceOTAProviders(const ProviderLocationType & providerLocation,
app::Clusters::OtaSoftwareUpdateRequestor::OTAAnnouncementReason announcementReason) override;
void SendQueryImage() override;
bool GetNextProviderLocation(ProviderLocationType & providerLocation) override;
bool GetNextProviderLocation(ProviderLocationType & providerLocation, bool & listExhausted) override;

protected:
void StartDefaultProviderTimer();
Expand All @@ -82,6 +82,9 @@ class GenericOTARequestorDriver : public OTARequestorDriver
OTAImageProcessorInterface * mImageProcessor = nullptr;
uint32_t mOtaStartDelaySec = 0;
uint32_t mPeriodicQueryTimeInterval = (24 * 60 * 60); // Timeout for querying providers on the default OTA provider list
// Maximum number of times to retry a BUSY OTA provider before moving to the next available one
static constexpr uint8_t kMaxBusyProviderRetryCount = 3;
uint8_t mProviderRetryCount; // Track retry count for the current provider
};

} // namespace DeviceLayer
Expand Down
5 changes: 4 additions & 1 deletion src/app/clusters/ota-requestor/OTARequestor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,8 @@ void OTARequestor::TriggerImmediateQueryInternal()
OTARequestorInterface::OTATriggerResult OTARequestor::TriggerImmediateQuery()
{
ProviderLocationType providerLocation;
if (mOtaRequestorDriver->GetNextProviderLocation(providerLocation) != true)
bool listExhausted = false;
if (mOtaRequestorDriver->GetNextProviderLocation(providerLocation, listExhausted) != true)
{
ChipLogError(SoftwareUpdate, "No OTA Providers available");
return kNoProviderKnown;
Expand Down Expand Up @@ -787,6 +788,8 @@ CHIP_ERROR OTARequestor::SendNotifyUpdateAppliedRequest(OperationalDeviceProxy &
Controller::OtaSoftwareUpdateProviderCluster cluster;
cluster.Associate(&deviceProxy, mProviderLocation.Value().endpoint);

mProviderLocation.ClearValue(); // Clearing the last used provider location to start afresh on reboot

return cluster.InvokeCommand(args, this, OnNotifyUpdateAppliedResponse, OnNotifyUpdateAppliedFailure);
}

Expand Down
6 changes: 4 additions & 2 deletions src/app/clusters/ota-requestor/OTARequestor.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@ class OTARequestor : public OTARequestorInterface, public BDXDownloader::StateDe
mUpdateToken = updateToken;
}


// Schedule the initializations that needs to be performed in the CHIP context
DeviceLayer::PlatformMgr().ScheduleWork(InitState, reinterpret_cast<intptr_t>(this));

Expand Down Expand Up @@ -333,7 +332,10 @@ class OTARequestor : public OTARequestorInterface, public BDXDownloader::StateDe
OTAUpdateStateEnum mCurrentUpdateState = OTAUpdateStateEnum::kUnknown;
Server * mServer = nullptr;
ProviderLocationList mDefaultOtaProviderList;
Optional<ProviderLocationType> mProviderLocation; // Provider location used for the current/last update in progress
// Provider location used for the current/last update in progress. Note that on reboot, this value will be read from the
// persistent storage (if available), used for sending the NotifyApplied message, and then cleared. This will ensure determinism
// in the OTARequestorDriver on reboot.
Optional<ProviderLocationType> mProviderLocation;
};

} // namespace chip
4 changes: 3 additions & 1 deletion src/app/clusters/ota-requestor/OTARequestorDriver.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,9 @@ class OTARequestorDriver
// Driver picks the OTA Provider that should be used for the next query and update. The Provider is picked according to
// the driver's internal logic such as, for example, traversing the default providers list.
// Returns true if there is a Provider available for the next query, returns false otherwise.
virtual bool GetNextProviderLocation(ProviderLocationType & providerLocation) = 0;
// [in] listExhausted - set to TRUE if the list of providers has been traversed until the end and has looped
// back to the beginning.
virtual bool GetNextProviderLocation(ProviderLocationType & providerLocation, bool & listExhausted) = 0;
};

} // namespace chip

0 comments on commit 57eddeb

Please sign in to comment.