Skip to content

Commit

Permalink
Refactor finished profiling into 2 separate methods
Browse files Browse the repository at this point in the history
  • Loading branch information
debermudez committed Jun 8, 2022
1 parent 7d08f36 commit d22bb07
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 46 deletions.
133 changes: 91 additions & 42 deletions src/c++/perf_analyzer/inference_profiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -630,7 +630,9 @@ InferenceProfiler::ProfileHelper(
}
}

if (finished_profiling(load_status, is_stable)) {
*is_stable = determine_stability(load_status);

if (is_done_profiling(load_status, is_stable)) {
break;
}

Expand All @@ -654,43 +656,23 @@ InferenceProfiler::ProfileHelper(
}

bool
InferenceProfiler::finished_profiling(LoadStatus& load_status, bool* is_stable)
InferenceProfiler::determine_stability(LoadStatus& load_status)
{
bool done = false;
bool stable = false;
if (load_status.infer_per_sec.size() >= load_parameters_.stability_window) {
stable = true;
size_t idx =
load_status.infer_per_sec.size() - load_parameters_.stability_window;
*is_stable = true;
bool within_threshold = false;

for (size_t i = idx; i < load_status.infer_per_sec.size(); i++) {
if (load_status.infer_per_sec[i] == 0) {
*is_stable = false;
return done;
stable = false;
}

within_threshold = check_within_threshold(idx, load_status);
}

*is_stable = *is_stable && check_window_for_stability(idx, load_status);
if (mpi_driver_->IsMPIRun()) {
if (AllMPIRanksAreStable(*is_stable)) {
done = true;
}
} else if (*is_stable) {
done = true;
}
if ((!within_threshold) && (latency_threshold_ms_ != NO_LIMIT)) {
done = true;
}
stable = stable && check_window_for_stability(idx, load_status);
}
return done;
}

bool
InferenceProfiler::check_within_threshold(size_t idx, LoadStatus& load_status)
{
return load_status.latencies[idx] < (latency_threshold_ms_ * 1000 * 1000);
return stable;
}

bool
Expand Down Expand Up @@ -728,6 +710,37 @@ InferenceProfiler::is_latency_window_stable(size_t idx, LoadStatus& load_status)
return max_latency / min_latency <= 1 + load_parameters_.stability_threshold;
}

bool
InferenceProfiler::is_done_profiling(LoadStatus& load_status, bool* is_stable)
{
size_t idx =
load_status.infer_per_sec.size() - load_parameters_.stability_window;
bool within_threshold = true;
bool done = false;

for (size_t i = idx; i < load_status.infer_per_sec.size(); i++) {
within_threshold &= check_within_threshold(idx, load_status);
}

if (mpi_driver_->IsMPIRun()) {
if (AllMPIRanksAreStable(*is_stable)) {
done = true;
}
} else if (*is_stable) {
done = true;
}
if ((!within_threshold) && (latency_threshold_ms_ != NO_LIMIT)) {
done = true;
}
return done;
}

bool
InferenceProfiler::check_within_threshold(size_t idx, LoadStatus& load_status)
{
return load_status.latencies[idx] < (latency_threshold_ms_ * 1000 * 1000);
}

cb::Error
InferenceProfiler::GetServerSideStatus(
std::map<cb::ModelIdentifier, cb::ModelStatistics>* model_stats)
Expand Down Expand Up @@ -1190,7 +1203,16 @@ class TestInferenceProfiler {
return ip.check_window_for_stability(idx, ls);
};

static bool test_finished_profiling(
static bool test_determine_stability(LoadStatus& ls, LoadParams& lp)
{
InferenceProfiler ip;
ip.load_parameters_.stability_threshold = lp.stability_threshold;
ip.load_parameters_.stability_window = lp.stability_window;

return ip.determine_stability(ls);
}

static bool test_is_done_profiling(
LoadStatus& ls, LoadParams& lp, uint64_t latency_threshold_ms)
{
InferenceProfiler ip;
Expand All @@ -1199,9 +1221,8 @@ class TestInferenceProfiler {
ip.latency_threshold_ms_ = latency_threshold_ms;
ip.mpi_driver_ = std::make_shared<triton::perfanalyzer::MPIDriver>(false);

bool is_stable = false;
ip.finished_profiling(ls, &is_stable);
return is_stable;
bool is_stable = ip.determine_stability(ls);
return ip.is_done_profiling(ls, &is_stable);
};
};

Expand Down Expand Up @@ -1286,7 +1307,7 @@ TEST_CASE("test check within threshold")
}
}

TEST_CASE("test_finished_profiling")
TEST_CASE("test_determine_stability")
{
LoadStatus ls;
LoadParams lp;
Expand All @@ -1298,14 +1319,30 @@ TEST_CASE("test_finished_profiling")
lp.stability_window = 3;
lp.stability_threshold = 0.1;
uint64_t latency_threshold_ms = 1;
CHECK(
TestInferenceProfiler::test_finished_profiling(
ls, lp, latency_threshold_ms) == false);
CHECK(TestInferenceProfiler::test_determine_stability(ls, lp) == false);

ls.infer_per_sec = {500.0, 520.0, 510.0};
CHECK(TestInferenceProfiler::test_determine_stability(ls, lp) == true);
}
}

TEST_CASE("test_is_done_profiling")
{
LoadStatus ls;
LoadParams lp;


SUBCASE("test latency_threshold is NO_LIMIT")
{
ls.infer_per_sec = {1.0, 1000.0, 500.0};
ls.latencies = {1, 1, 1};
lp.stability_window = 3;
lp.stability_threshold = 0.1;
uint64_t latency_threshold_ms = NO_LIMIT;

CHECK(
TestInferenceProfiler::test_finished_profiling(
ls, lp, latency_threshold_ms) == true);
TestInferenceProfiler::test_is_done_profiling(
ls, lp, latency_threshold_ms) == false);
}

SUBCASE("test latency_threshold is NO_LIMIT")
Expand All @@ -1317,11 +1354,24 @@ TEST_CASE("test_finished_profiling")
uint64_t latency_threshold_ms = NO_LIMIT;

CHECK(
TestInferenceProfiler::test_finished_profiling(
TestInferenceProfiler::test_is_done_profiling(
ls, lp, latency_threshold_ms) == false);
}

SUBCASE("test stability from finished profiling")

SUBCASE("test not within threshold from done profiling")
{
ls.infer_per_sec = {1.0, 1000.0, 500.0};
ls.latencies = {2000000, 2000000, 2000000};
lp.stability_window = 3;
lp.stability_threshold = 0.1;
uint64_t latency_threshold_ms = 1;
CHECK(
TestInferenceProfiler::test_is_done_profiling(
ls, lp, latency_threshold_ms) == true);
}

SUBCASE("test stability from is done profiling")
{
ls.infer_per_sec = {1.0, 1000.0, 500.0};
ls.latencies = {1, 1, 1};
Expand All @@ -1330,16 +1380,15 @@ TEST_CASE("test_finished_profiling")
uint64_t latency_threshold_ms = 1;

CHECK(
TestInferenceProfiler::test_finished_profiling(
TestInferenceProfiler::test_is_done_profiling(
ls, lp, latency_threshold_ms) == false);
ls.infer_per_sec = {500.0, 520.0, 510.0};

CHECK(
TestInferenceProfiler::test_finished_profiling(
TestInferenceProfiler::test_is_done_profiling(
ls, lp, latency_threshold_ms) == true);
}
}

#endif

}} // namespace triton::perfanalyzer
13 changes: 9 additions & 4 deletions src/c++/perf_analyzer/inference_profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -326,17 +326,22 @@ class InferenceProfiler {

/// A helper function to determine if profiling is stable
/// \param load_status Stores the observations of infer_per_sec and latencies
/// \param is_stable Returns whether the measurement stabilized or not.
/// \return Returns if the threshold and latencies are stable. Used to
/// determine if we should break out of the infinite stability check loop.
bool finished_profiling(LoadStatus& load_status, bool* is_stable);
/// \return Returns if the threshold and latencies are stable.
bool determine_stability(LoadStatus& load_status);

/// Check if latency at index idx is within the latency threshold
/// \param idx index in latency vector
/// \param load_status Stores the observations of infer_per_sec and latencies
/// \return Returns whether the latencies are below the max threshold
bool check_within_threshold(size_t idx, LoadStatus& load_status);

/// A helper function to determine if profiling is done
/// \param load_status Stores the observations of infer_per_sec and latencies
/// \param is_stable Returns whether the measurement stabilized or not.
/// \return Returns if we should break out of the infinite stability check
/// loop.
bool is_done_profiling(LoadStatus& load_status, bool* is_stable);

/// Check if observed inferences and latencies are within threshold
/// for a single window starting at idx
/// \param idx index in latency vector
Expand Down

0 comments on commit d22bb07

Please sign in to comment.