Skip to content

Commit

Permalink
Exit with error on failed stabilization (#114)
Browse files Browse the repository at this point in the history
* Add failed stabilization error exit

* Addressed comments
  • Loading branch information
matthewkotila authored and mc-nv committed Jun 13, 2022
1 parent 8709fbc commit 2dcf8c3
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 54 deletions.
15 changes: 12 additions & 3 deletions src/c++/perf_analyzer/client_backend/client_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,24 @@ namespace triton { namespace perfanalyzer { namespace clientbackend {

//================================================

const Error Error::Success("");
const Error Error::Success;
const Error Error::Failure("");

Error::Error(const std::string& msg) : msg_(msg) {}
Error::Error()
{
has_error_ = false;
}

Error::Error(const std::string& msg) : msg_(msg)
{
has_error_ = true;
}

std::ostream&
operator<<(std::ostream& out, const Error& err)
{
if (!err.msg_.empty()) {
out << err.msg_;
out << err.msg_ << std::endl;
}
return out;
}
Expand Down
12 changes: 10 additions & 2 deletions src/c++/perf_analyzer/client_backend/client_backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,12 @@ namespace triton { namespace perfanalyzer { namespace clientbackend {
///
class Error {
public:
/// Create an error
explicit Error();

/// Create an error with the specified message.
/// \param msg The message for the error
explicit Error(const std::string& msg = "");
explicit Error(const std::string& msg);

/// Accessor for the message of this error.
/// \return The messsage for the error. Empty if no error.
Expand All @@ -80,15 +83,20 @@ class Error {
/// Does this error indicate OK status?
/// \return True if this error indicates "ok"/"success", false if
/// error indicates a failure.
bool IsOk() const { return msg_.empty(); }
bool IsOk() const { return !has_error_; }

/// Convenience "success" value. Can be used as Error::Success to
/// indicate no error.
static const Error Success;

/// Convenience "failure" value. Can be used as Error::Failure to
/// indicate a generic error.
static const Error Failure;

private:
friend std::ostream& operator<<(std::ostream&, const Error&);
std::string msg_;
bool has_error_;
};

//===================================================================================
Expand Down
81 changes: 42 additions & 39 deletions src/c++/perf_analyzer/inference_profiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -427,36 +427,29 @@ InferenceProfiler::InferenceProfiler(
cb::Error
InferenceProfiler::Profile(
const size_t concurrent_request_count, std::vector<PerfStatus>& summary,
bool* meets_threshold)
bool& meets_threshold, bool& is_stable)
{
cb::Error err;
PerfStatus status_summary;

status_summary.concurrency = concurrent_request_count;

bool is_stable = false;
*meets_threshold = true;
is_stable = false;
meets_threshold = true;

RETURN_IF_ERROR(dynamic_cast<ConcurrencyManager*>(manager_.get())
->ChangeConcurrencyLevel(concurrent_request_count));

err = ProfileHelper(false /* clean_starts */, status_summary, &is_stable);
if (err.IsOk()) {
err = Report(
status_summary, percentile_, protocol_, verbose_, include_lib_stats_,
include_server_stats_, parser_);
summary.push_back(status_summary);
uint64_t stabilizing_latency_ms =
status_summary.stabilizing_latency_ns / (1000 * 1000);
if (!err.IsOk()) {
std::cerr << err << std::endl;
*meets_threshold = false;
} else if (
(stabilizing_latency_ms >= latency_threshold_ms_) &&
if ((stabilizing_latency_ms >= latency_threshold_ms_) &&
(latency_threshold_ms_ != NO_LIMIT)) {
std::cerr << "Measured latency went over the set limit of "
<< latency_threshold_ms_ << " msec. " << std::endl;
*meets_threshold = false;
meets_threshold = false;
} else if (!is_stable) {
if (measurement_mode_ == MeasurementMode::TIME_WINDOWS) {
std::cerr << "Failed to obtain stable measurement within "
Expand All @@ -469,7 +462,15 @@ InferenceProfiler::Profile(
<< concurrent_request_count << ". Please try to "
<< "increase the --measurement-request-count." << std::endl;
}
*meets_threshold = false;
meets_threshold = false;
} else {
err = Report(
status_summary, percentile_, protocol_, verbose_, include_lib_stats_,
include_server_stats_, parser_);
if (!err.IsOk()) {
std::cerr << err;
meets_threshold = false;
}
}
} else {
return err;
Expand All @@ -481,39 +482,40 @@ InferenceProfiler::Profile(
cb::Error
InferenceProfiler::Profile(
const double request_rate, std::vector<PerfStatus>& summary,
bool* meets_threshold)
bool& meets_threshold, bool& is_stable)
{
cb::Error err;
PerfStatus status_summary;

status_summary.request_rate = request_rate;

bool is_stable = false;
*meets_threshold = true;
is_stable = false;
meets_threshold = true;

RETURN_IF_ERROR(dynamic_cast<RequestRateManager*>(manager_.get())
->ChangeRequestRate(request_rate));

err = ProfileHelper(false /*clean_starts*/, status_summary, &is_stable);
if (err.IsOk()) {
err = Report(
status_summary, percentile_, protocol_, verbose_, include_lib_stats_,
include_server_stats_, parser_);
summary.push_back(status_summary);
uint64_t stabilizing_latency_ms =
status_summary.stabilizing_latency_ns / (1000 * 1000);
if (!err.IsOk()) {
std::cerr << err << std::endl;
*meets_threshold = false;
} else if (
(stabilizing_latency_ms >= latency_threshold_ms_) &&
if ((stabilizing_latency_ms >= latency_threshold_ms_) &&
(latency_threshold_ms_ != NO_LIMIT)) {
std::cerr << "Measured latency went over the set limit of "
<< latency_threshold_ms_ << " msec. " << std::endl;
*meets_threshold = false;
meets_threshold = false;
} else if (!is_stable) {
std::cerr << "Failed to obtain stable measurement." << std::endl;
*meets_threshold = false;
meets_threshold = false;
} else {
err = Report(
status_summary, percentile_, protocol_, verbose_, include_lib_stats_,
include_server_stats_, parser_);
if (!err.IsOk()) {
std::cerr << err;
meets_threshold = false;
}
}
} else {
return err;
Expand All @@ -524,7 +526,7 @@ InferenceProfiler::Profile(

cb::Error
InferenceProfiler::Profile(
std::vector<PerfStatus>& summary, bool* meets_threshold)
std::vector<PerfStatus>& summary, bool& meets_threshold, bool& is_stable)
{
cb::Error err;
PerfStatus status_summary;
Expand All @@ -534,29 +536,30 @@ InferenceProfiler::Profile(
RETURN_IF_ERROR(dynamic_cast<CustomLoadManager*>(manager_.get())
->GetCustomRequestRate(&status_summary.request_rate));

bool is_stable = false;
*meets_threshold = true;
is_stable = false;
meets_threshold = true;

err = ProfileHelper(true /* clean_starts */, status_summary, &is_stable);
if (err.IsOk()) {
err = Report(
status_summary, percentile_, protocol_, verbose_, include_lib_stats_,
include_server_stats_, parser_);
summary.push_back(status_summary);
uint64_t stabilizing_latency_ms =
status_summary.stabilizing_latency_ns / (1000 * 1000);
if (!err.IsOk()) {
std::cerr << err << std::endl;
*meets_threshold = false;
} else if (
(stabilizing_latency_ms >= latency_threshold_ms_) &&
if ((stabilizing_latency_ms >= latency_threshold_ms_) &&
(latency_threshold_ms_ != NO_LIMIT)) {
std::cerr << "Measured latency went over the set limit of "
<< latency_threshold_ms_ << " msec. " << std::endl;
*meets_threshold = false;
meets_threshold = false;
} else if (!is_stable) {
std::cerr << "Failed to obtain stable measurement." << std::endl;
*meets_threshold = false;
meets_threshold = false;
} else {
err = Report(
status_summary, percentile_, protocol_, verbose_, include_lib_stats_,
include_server_stats_, parser_);
if (!err.IsOk()) {
std::cerr << err;
meets_threshold = false;
}
}
} else {
return err;
Expand Down
27 changes: 18 additions & 9 deletions src/c++/perf_analyzer/inference_profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -228,28 +228,33 @@ class InferenceProfiler {
std::vector<PerfStatus>& summary)
{
cb::Error err;
bool meets_threshold;
bool meets_threshold, is_stable;
if (search_mode == SearchMode::NONE) {
err = Profile(summary, &meets_threshold);
err = Profile(summary, meets_threshold, is_stable);
if (!err.IsOk()) {
return err;
}
} else if (search_mode == SearchMode::LINEAR) {
T current_value = start;
do {
err = Profile(current_value, summary, &meets_threshold);
err = Profile(current_value, summary, meets_threshold, is_stable);
if (!err.IsOk()) {
return err;
}
current_value += step;
} while (((current_value <= end) || (end == static_cast<T>(NO_LIMIT))) &&
(meets_threshold));
// If there was only one concurrency we swept over and it did not meet the
// stability threshold, we should return an error.
if (current_value == (start + step) && is_stable == false) {
return cb::Error::Failure;
}
} else {
err = Profile(start, summary, &meets_threshold);
err = Profile(start, summary, meets_threshold, is_stable);
if (!err.IsOk() || (!meets_threshold)) {
return err;
}
err = Profile(end, summary, &meets_threshold);
err = Profile(end, summary, meets_threshold, is_stable);
if (!err.IsOk() || (meets_threshold)) {
return err;
}
Expand All @@ -258,7 +263,7 @@ class InferenceProfiler {
T this_end = end;
while ((this_end - this_start) > step) {
T current_value = (this_end + this_start) / 2;
err = Profile(current_value, summary, &meets_threshold);
err = Profile(current_value, summary, meets_threshold, is_stable);
if (!err.IsOk()) {
return err;
}
Expand Down Expand Up @@ -296,29 +301,33 @@ class InferenceProfiler {
/// \param concurrent_request_count The concurrency level for the measurement.
/// \param summary Appends the measurements summary at the end of this list.
/// \param meets_threshold Returns whether the setting meets the threshold.
/// \param is_stable Returns whether the measurement is stable.
/// \return cb::Error object indicating success or failure.
cb::Error Profile(
const size_t concurrent_request_count, std::vector<PerfStatus>& summary,
bool* meets_threshold);
bool& meets_threshold, bool& is_stable);

/// Similar to above function, but instead of setting the concurrency, it
/// sets the specified request rate for measurements.
/// \param request_rate The request rate for inferences.
/// \param summary Appends the measurements summary at the end of this list.
/// \param meets_threshold Returns whether the setting meets the threshold.
/// \param is_stable Returns whether the measurement is stable.
/// \return cb::Error object indicating success or failure.
cb::Error Profile(
const double request_rate, std::vector<PerfStatus>& summary,
bool* meets_threshold);
bool& meets_threshold, bool& is_stable);

/// Measures throughput and latencies for custom load without controling
/// request rate nor concurrency. Requires load manager to be loaded with
/// a file specifying the time intervals.
/// \param summary Appends the measurements summary at the end of this list.
/// \param meets_threshold Returns whether the measurement met the threshold.
/// \param is_stable Returns whether the measurement is stable.
/// \return cb::Error object indicating success
/// or failure.
cb::Error Profile(std::vector<PerfStatus>& summary, bool* meets_threshold);
cb::Error Profile(
std::vector<PerfStatus>& summary, bool& meets_threshold, bool& is_stable);

/// A helper function for profiling functions.
/// \param clean_starts Whether or not to reset load cycle with every
Expand Down
2 changes: 1 addition & 1 deletion src/c++/perf_analyzer/perf_analyzer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1831,7 +1831,7 @@ PerfAnalyzer::Run(int argc, char** argv)
mpi_driver->MPIBarrierWorld();

if (!err.IsOk()) {
std::cerr << err << std::endl;
std::cerr << err;
// In the case of early_exit, the thread does not return and continues to
// report the summary
if (!pa::early_exit) {
Expand Down

0 comments on commit 2dcf8c3

Please sign in to comment.