Skip to content

Commit

Permalink
Fix merging for ensemble models
Browse files Browse the repository at this point in the history
  • Loading branch information
Tabrizian committed Jun 3, 2022
1 parent 634a579 commit 598cb3c
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 51 deletions.
145 changes: 94 additions & 51 deletions src/c++/perf_analyzer/inference_profiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,88 @@ InferenceProfiler::ProfileHelper(
return cb::Error::Success;
}

cb::Error
InferenceProfiler::MergeServerSideStats(
std::vector<ServerSideStats>& server_side_stats,
ServerSideStats& server_side_summary)
{
auto& server_side_stat = server_side_stats[0];

// Make sure that the perf status reports profiling settings match with each
// other.
for (size_t i = 1; i < server_side_stats.size(); i++) {
if (server_side_stats[i].composing_models_stat.size() !=
server_side_stat.composing_models_stat.size()) {
return cb::Error(
"Inconsistent ensemble setting detected between the trials.");
}
}

// Initialize the server stats for the merged report.
server_side_summary.inference_count = 0;
server_side_summary.execution_count = 0;
server_side_summary.cache_hit_count = 0;
server_side_summary.cache_miss_count = 0;
server_side_summary.success_count = 0;
server_side_summary.queue_count = 0;
server_side_summary.compute_input_count = 0;
server_side_summary.compute_output_count = 0;
server_side_summary.compute_infer_count = 0;
server_side_summary.cumm_time_ns = 0;
server_side_summary.queue_time_ns = 0;
server_side_summary.compute_input_time_ns = 0;
server_side_summary.compute_infer_time_ns = 0;
server_side_summary.compute_output_time_ns = 0;
server_side_summary.cache_hit_time_ns = 0;
server_side_summary.cache_miss_time_ns = 0;
server_side_summary.composing_models_stat.clear();
for (auto& composing_model_stat : server_side_stat.composing_models_stat) {
std::vector<ServerSideStats> composing_model_stats;
for (auto& server_side_stat : server_side_stats) {
composing_model_stats.push_back(
server_side_stat.composing_models_stat[composing_model_stat.first]);
}

ServerSideStats merged_composing_model_stats;
RETURN_IF_ERROR(MergeServerSideStats(
composing_model_stats, merged_composing_model_stats));
}

for (auto& server_side_stat : server_side_stats) {
// Aggregated Server Stats
server_side_summary.inference_count +=
server_side_stat.inference_count;
server_side_summary.execution_count +=
server_side_stat.execution_count;
server_side_summary.cache_hit_count +=
server_side_stat.cache_hit_count;
server_side_summary.cache_miss_count +=
server_side_stat.cache_miss_count;
server_side_summary.success_count += server_side_stat.success_count;
server_side_summary.queue_count += server_side_stat.queue_count;
server_side_summary.compute_input_count +=
server_side_stat.compute_input_count;
server_side_summary.compute_infer_count +=
server_side_stat.compute_infer_count;
server_side_summary.compute_output_count +=
server_side_stat.compute_output_count;
server_side_summary.cumm_time_ns += server_side_stat.cumm_time_ns;
server_side_summary.queue_time_ns += server_side_stat.queue_time_ns;
server_side_summary.compute_input_time_ns +=
server_side_stat.compute_input_time_ns;
server_side_summary.compute_infer_time_ns +=
server_side_stat.compute_infer_time_ns;
server_side_summary.compute_output_time_ns +=
server_side_stat.compute_output_time_ns;
server_side_summary.cache_hit_time_ns +=
server_side_stat.cache_hit_time_ns;
server_side_summary.cache_miss_time_ns +=
server_side_stat.cache_miss_time_ns;
}

return cb::Error::Success;
}

cb::Error
InferenceProfiler::MergePerfStatusReports(
std::deque<PerfStatus>& perf_status_reports, PerfStatus& summary_status)
Expand All @@ -730,6 +812,12 @@ InferenceProfiler::MergePerfStatusReports(
if (perf_status_reports[i].batch_size != perf_status.batch_size) {
return cb::Error("Incosistent batch size detected.");
}

if (perf_status_reports[i].server_stats.composing_models_stat.size() !=
perf_status.server_stats.composing_models_stat.size()) {
return cb::Error(
"Inconsistent ensemble setting detected between the trials.");
}
}

summary_status.batch_size = perf_status.batch_size;
Expand All @@ -752,25 +840,8 @@ InferenceProfiler::MergePerfStatusReports(
summary_status.client_stats.completed_count = 0;
summary_status.stabilizing_latency_ns = 0;

// Initialize the server stats for the merged report.
summary_status.server_stats.inference_count = 0;
summary_status.server_stats.execution_count = 0;
summary_status.server_stats.cache_hit_count = 0;
summary_status.server_stats.cache_miss_count = 0;
summary_status.server_stats.success_count = 0;
summary_status.server_stats.queue_count = 0;
summary_status.server_stats.compute_input_count = 0;
summary_status.server_stats.compute_output_count = 0;
summary_status.server_stats.compute_infer_count = 0;
summary_status.server_stats.cumm_time_ns = 0;
summary_status.server_stats.queue_time_ns = 0;
summary_status.server_stats.compute_input_time_ns = 0;
summary_status.server_stats.compute_infer_time_ns = 0;
summary_status.server_stats.compute_output_time_ns = 0;
summary_status.server_stats.cache_hit_time_ns = 0;
summary_status.server_stats.cache_miss_time_ns = 0;
summary_status.server_stats.composing_models_stat.clear();

std::vector<ServerSideStats> server_side_stats;
for (auto& perf_status : perf_status_reports) {
// Aggregated Client Stats
summary_status.client_stats.request_count +=
Expand All @@ -782,39 +853,7 @@ InferenceProfiler::MergePerfStatusReports(
summary_status.client_stats.duration_ns +=
perf_status.client_stats.duration_ns;

// Aggregated Server Stats
summary_status.server_stats.inference_count +=
perf_status.server_stats.inference_count;
summary_status.server_stats.execution_count +=
perf_status.server_stats.execution_count;
summary_status.server_stats.cache_hit_count +=
perf_status.server_stats.cache_hit_count;
summary_status.server_stats.cache_miss_count +=
perf_status.server_stats.cache_miss_count;
summary_status.server_stats.success_count +=
perf_status.server_stats.success_count;
summary_status.server_stats.queue_count +=
perf_status.server_stats.queue_count;
summary_status.server_stats.compute_input_count +=
perf_status.server_stats.compute_input_count;
summary_status.server_stats.compute_infer_count +=
perf_status.server_stats.compute_infer_count;
summary_status.server_stats.compute_output_count +=
perf_status.server_stats.compute_output_count;
summary_status.server_stats.cumm_time_ns +=
perf_status.server_stats.cumm_time_ns;
summary_status.server_stats.queue_time_ns +=
perf_status.server_stats.queue_time_ns;
summary_status.server_stats.compute_input_time_ns +=
perf_status.server_stats.compute_input_time_ns;
summary_status.server_stats.compute_infer_time_ns +=
perf_status.server_stats.compute_infer_time_ns;
summary_status.server_stats.compute_output_time_ns +=
perf_status.server_stats.compute_output_time_ns;
summary_status.server_stats.cache_hit_time_ns +=
perf_status.server_stats.cache_hit_time_ns;
summary_status.server_stats.cache_miss_time_ns +=
perf_status.server_stats.cache_miss_time_ns;
server_side_stats.push_back(perf_status.server_stats);

summary_status.client_stats.latencies.insert(
summary_status.client_stats.latencies.end(),
Expand Down Expand Up @@ -854,6 +893,10 @@ InferenceProfiler::MergePerfStatusReports(
summary_status.client_stats.completed_count;
}
}

RETURN_IF_ERROR(
MergeServerSideStats(server_side_stats, summary_status.server_stats));

std::sort(
summary_status.client_stats.latencies.begin(),
summary_status.client_stats.latencies.end());
Expand Down
4 changes: 4 additions & 0 deletions src/c++/perf_analyzer/inference_profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,10 @@ class InferenceProfiler {
cb::Error MergePerfStatusReports(
std::deque<PerfStatus>& perf_status, PerfStatus& summary_status);

cb::Error MergeServerSideStats(
std::vector<ServerSideStats>& server_side_stats,
ServerSideStats& server_side_summary);

/// Similar to above function, but instead of setting the concurrency, it
/// sets the specified request rate for measurements. \param request_rate
/// The request rate for inferences. \param summary Appends the
Expand Down

0 comments on commit 598cb3c

Please sign in to comment.