Skip to content

Commit

Permalink
update stability formula (#111)
Browse files Browse the repository at this point in the history
* Update formula to use max/min < 1.1

* Add comments and clean up code

* Update ignore file to include builddir

* Add unit testing to the stability function

* Update stability method names

* Move load parameter setting inside class

* Refactor check window for stability method

* Update method comments and add a return value to reached_stability method

* Add testing for within threshold check

* Rename reached_stability method and updated comments

* Add more testing for finished profiling method

* Clean up comments from dead code

* Update documentation

* Refactor finished profiling into 2 separate methods

* Update help message

* Reformat to use camel case

* Capitalize method names

* Formatted

* Add an underflow test and check

Co-authored-by: Elias Bermudez <dbermudez@nvidia.com>
Co-authored-by: Matthew Kotila <matthew.r.kotila@gmail.com>
  • Loading branch information
3 people authored and mc-nv committed Jun 13, 2022
1 parent 2dcf8c3 commit 217a234
Show file tree
Hide file tree
Showing 4 changed files with 340 additions and 57 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@
/.vscode
*.so
node_modules
src/c++/perf_analyzer/builddir/
src/c++/perf_analyzer/.vscode/
343 changes: 292 additions & 51 deletions src/c++/perf_analyzer/inference_profiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -638,57 +638,12 @@ InferenceProfiler::ProfileHelper(
}
}

if (load_status.infer_per_sec.size() >= load_parameters_.stability_window) {
size_t idx =
load_status.infer_per_sec.size() - load_parameters_.stability_window;
if (load_status.infer_per_sec.size() >
load_parameters_.stability_window) {
load_status.avg_ips -= load_status.infer_per_sec[idx - 1] /
load_parameters_.stability_window;
load_status.avg_latency -=
load_status.latencies[idx - 1] / load_parameters_.stability_window;
}
*is_stable = true;
bool within_threshold = false;
for (; idx < load_status.infer_per_sec.size(); idx++) {
if (load_status.infer_per_sec[idx] == 0) {
*is_stable = false;
}
if ((load_status.latencies[idx] <
(latency_threshold_ms_ * 1000 * 1000))) {
within_threshold = true;
}
*is_stable = DetermineStability(load_status);

// We call it complete only if stability_window measurements are within
// +/-(stability_threshold)% of the average infer per second and latency
if ((load_status.infer_per_sec[idx] <
load_status.avg_ips *
(1 - load_parameters_.stability_threshold)) ||
(load_status.infer_per_sec[idx] >
load_status.avg_ips *
(1 + load_parameters_.stability_threshold))) {
*is_stable = false;
}
if ((load_status.latencies[idx] <
load_status.avg_latency *
(1 - load_parameters_.stability_threshold)) ||
(load_status.latencies[idx] >
load_status.avg_latency *
(1 + load_parameters_.stability_threshold))) {
*is_stable = false;
}
}
if (mpi_driver_->IsMPIRun()) {
if (AllMPIRanksAreStable(*is_stable)) {
break;
}
} else if (*is_stable) {
break;
}
if ((!within_threshold) && (latency_threshold_ms_ != NO_LIMIT)) {
break;
}
if (IsDoneProfiling(load_status, is_stable)) {
break;
}

completed_trials++;
} while ((!early_exit) && (completed_trials < max_trials_));

Expand All @@ -710,6 +665,93 @@ InferenceProfiler::ProfileHelper(
return cb::Error::Success;
}

bool
InferenceProfiler::DetermineStability(LoadStatus& load_status)
{
bool stable = false;
if (load_status.infer_per_sec.size() >= load_parameters_.stability_window) {
stable = true;
size_t idx =
load_status.infer_per_sec.size() - load_parameters_.stability_window;

for (size_t i = idx; i < load_status.infer_per_sec.size(); i++) {
if (load_status.infer_per_sec[i] == 0) {
stable = false;
}
}

stable = stable && CheckWindowForStability(idx, load_status);
}
return stable;
}

bool
InferenceProfiler::CheckWindowForStability(size_t idx, LoadStatus& load_status)
{
return IsInferWindowStable(idx, load_status) &&
IsLatencyWindowStable(idx, load_status);
}

bool
InferenceProfiler::IsInferWindowStable(size_t idx, LoadStatus& load_status)
{
auto infer_start = std::begin(load_status.infer_per_sec) + idx;
auto infer_per_sec_measurements = std::minmax_element(
infer_start, infer_start + load_parameters_.stability_window);

auto max_infer_per_sec = *infer_per_sec_measurements.second;
auto min_infer_per_sec = *infer_per_sec_measurements.first;

return max_infer_per_sec / min_infer_per_sec <=
1 + load_parameters_.stability_threshold;
}

bool
InferenceProfiler::IsLatencyWindowStable(size_t idx, LoadStatus& load_status)
{
auto latency_start = std::begin(load_status.latencies) + idx;
auto latencies_per_sec_measurements = std::minmax_element(
latency_start, latency_start + load_parameters_.stability_window);

auto max_latency = *latencies_per_sec_measurements.second;
auto min_latency = *latencies_per_sec_measurements.first;

return max_latency / min_latency <= 1 + load_parameters_.stability_threshold;
}

bool
InferenceProfiler::IsDoneProfiling(LoadStatus& load_status, bool* is_stable)
{
bool done = false;
bool within_threshold = true;
if (load_status.infer_per_sec.size() >= load_parameters_.stability_window) {
size_t idx =
load_status.infer_per_sec.size() - load_parameters_.stability_window;

for (; idx < load_status.infer_per_sec.size(); idx++) {
within_threshold &= CheckWithinThreshold(idx, load_status);
}
}

if (mpi_driver_->IsMPIRun()) {
if (AllMPIRanksAreStable(*is_stable)) {
done = true;
}
} else if (*is_stable) {
done = true;
}
if ((!within_threshold) && (latency_threshold_ms_ != NO_LIMIT)) {
done = true;
}
return done;
}

bool
InferenceProfiler::CheckWithinThreshold(size_t idx, LoadStatus& load_status)
{
return load_status.latencies[idx] < (latency_threshold_ms_ * 1000 * 1000);
}

cb::Error
InferenceProfiler::MergeServerSideStats(
std::vector<ServerSideStats>& server_side_stats,
Expand Down Expand Up @@ -1172,8 +1214,8 @@ InferenceProfiler::SummarizeServerStatsHelper(
const std::map<cb::ModelIdentifier, cb::ModelStatistics>& end_status,
ServerSideStats* server_stats)
{
// If model_version is an empty string then look in the end status to find the
// latest (highest valued version) and use that as the version.
// If model_version is an empty string then look in the end status to find
// the latest (highest valued version) and use that as the version.
int64_t status_model_version = -1;
if (model_identifier.second.empty()) {
for (const auto& id : end_status) {
Expand Down Expand Up @@ -1346,6 +1388,50 @@ class TestInferenceProfiler {
inference_profiler.ValidLatencyMeasurement(
valid_range, valid_sequence_count, delayed_request_count, latencies);
}


static bool TestCheckWithinThreshold(
LoadStatus& ls, LoadParams& lp, uint64_t latency_threshold_ms)
{
InferenceProfiler ip;
size_t idx = ls.infer_per_sec.size() - lp.stability_window;
ip.latency_threshold_ms_ = latency_threshold_ms;

return ip.CheckWithinThreshold(idx, ls);
}

static bool TestCheckWindowForStability(LoadStatus& ls, LoadParams& lp)
{
size_t idx = ls.infer_per_sec.size() - lp.stability_window;

InferenceProfiler ip;
ip.load_parameters_.stability_threshold = lp.stability_threshold;
ip.load_parameters_.stability_window = lp.stability_window;

return ip.CheckWindowForStability(idx, ls);
};

static bool TestDetermineStability(LoadStatus& ls, LoadParams& lp)
{
InferenceProfiler ip;
ip.load_parameters_.stability_threshold = lp.stability_threshold;
ip.load_parameters_.stability_window = lp.stability_window;

return ip.DetermineStability(ls);
}

static bool TestIsDoneProfiling(
LoadStatus& ls, LoadParams& lp, uint64_t latency_threshold_ms)
{
InferenceProfiler ip;
ip.load_parameters_.stability_threshold = lp.stability_threshold;
ip.load_parameters_.stability_window = lp.stability_window;
ip.latency_threshold_ms_ = latency_threshold_ms;
ip.mpi_driver_ = std::make_shared<triton::perfanalyzer::MPIDriver>(false);

bool is_stable = ip.DetermineStability(ls);
return ip.IsDoneProfiling(ls, &is_stable);
};
};

TEST_CASE("testing the ValidLatencyMeasurement function")
Expand Down Expand Up @@ -1397,5 +1483,160 @@ TEST_CASE("testing the ValidLatencyMeasurement function")
CHECK(latencies[2] == convert_timestamp_to_latency(all_timestamps[3]));
}

TEST_CASE("test_check_window_for_stability")
{
LoadStatus ls;
LoadParams lp;

SUBCASE("test throughput not stable")
{
ls.infer_per_sec = {1.0, 1000.0, 500.0};
ls.latencies = {1, 1, 1};
lp.stability_window = 3;
lp.stability_threshold = 0.1;
CHECK(TestInferenceProfiler::TestCheckWindowForStability(ls, lp) == false);
}
SUBCASE("test throughput stable")
{
ls.infer_per_sec = {500.0, 520.0, 510.0};
ls.latencies = {1, 1, 1};
lp.stability_window = 3;
lp.stability_threshold = 0.1;
CHECK(TestInferenceProfiler::TestCheckWindowForStability(ls, lp) == true);
}
SUBCASE("test latency not stable")
{
ls.infer_per_sec = {500.0, 520.0, 510.0};
ls.latencies = {1, 100, 50};
lp.stability_window = 3;
lp.stability_threshold = 0.1;
CHECK(TestInferenceProfiler::TestCheckWindowForStability(ls, lp) == false);
}
SUBCASE("test latency stable")
{
ls.infer_per_sec = {500.0, 520.0, 510.0};
ls.latencies = {45, 50, 45};
lp.stability_window = 3;
lp.stability_threshold = 0.1;
CHECK(TestInferenceProfiler::TestCheckWindowForStability(ls, lp) == true);
}
SUBCASE("test throughput stable after many measurements")
{
ls.infer_per_sec = {1.0, 1000.0, 500.0, 1500.0, 500.0, 520.0, 510.0};
ls.latencies = {1, 1, 1, 1, 1, 1, 1};
lp.stability_window = 3;
lp.stability_threshold = 0.1;
CHECK(TestInferenceProfiler::TestCheckWindowForStability(ls, lp) == true);
}
}

TEST_CASE("test check within threshold")
{
LoadStatus ls;
LoadParams lp;

ls.infer_per_sec = {500.0, 520.0, 510.0};
lp.stability_window = 3;
lp.stability_threshold = 0.1;
uint64_t latency_threshold_ms = 1;

SUBCASE("test not within threshold")
{
ls.latencies = {2000000, 2000000, 2000000};
CHECK(
TestInferenceProfiler::TestCheckWithinThreshold(
ls, lp, latency_threshold_ms) == false);
}

SUBCASE("test within threshold")
{
ls.latencies = {100000, 100000, 100000};
CHECK(
TestInferenceProfiler::TestCheckWithinThreshold(
ls, lp, latency_threshold_ms) == true);
}
}

TEST_CASE("test_determine_stability")
{
LoadStatus ls;
LoadParams lp;

SUBCASE("test inference equals zero")
{
ls.infer_per_sec = {500.0, 0.0, 510.0};
ls.latencies = {1, 1, 1};
lp.stability_window = 3;
lp.stability_threshold = 0.1;
uint64_t latency_threshold_ms = 1;
CHECK(TestInferenceProfiler::TestDetermineStability(ls, lp) == false);

ls.infer_per_sec = {500.0, 520.0, 510.0};
CHECK(TestInferenceProfiler::TestDetermineStability(ls, lp) == true);
}
}

TEST_CASE("test_is_done_profiling")
{
LoadStatus ls;
LoadParams lp;


SUBCASE("test latency_threshold is NO_LIMIT")
{
ls.infer_per_sec = {1.0, 1000.0, 500.0};
ls.latencies = {1, 1, 1};
lp.stability_window = 3;
lp.stability_threshold = 0.1;
uint64_t latency_threshold_ms = NO_LIMIT;

CHECK(
TestInferenceProfiler::TestIsDoneProfiling(
ls, lp, latency_threshold_ms) == false);
}

SUBCASE("test not within threshold from done profiling")
{
ls.infer_per_sec = {1.0, 1000.0, 500.0};
ls.latencies = {2000000, 2000000, 2000000};
lp.stability_window = 3;
lp.stability_threshold = 0.1;
uint64_t latency_threshold_ms = 1;
CHECK(
TestInferenceProfiler::TestIsDoneProfiling(
ls, lp, latency_threshold_ms) == true);
}

SUBCASE("test stability from is done profiling")
{
ls.infer_per_sec = {1.0, 1000.0, 500.0};
ls.latencies = {1, 1, 1};
lp.stability_window = 3;
lp.stability_threshold = 0.1;
uint64_t latency_threshold_ms = 1;

CHECK(
TestInferenceProfiler::TestIsDoneProfiling(
ls, lp, latency_threshold_ms) == false);
ls.infer_per_sec = {500.0, 520.0, 510.0};

CHECK(
TestInferenceProfiler::TestIsDoneProfiling(
ls, lp, latency_threshold_ms) == true);
}

SUBCASE("test underflow")
{
ls.infer_per_sec = {500.0, 510.0};
ls.latencies = {1, 1};
lp.stability_window = 3;
lp.stability_threshold = 0.1;
uint64_t latency_threshold_ms = 1;
CHECK(
TestInferenceProfiler::TestIsDoneProfiling(
ls, lp, latency_threshold_ms) == false);
}
}

#endif
}} // namespace triton::perfanalyzer
Loading

0 comments on commit 217a234

Please sign in to comment.