Skip to content

Commit

Permalink
Fix perf_analyzer report for ensemble models (triton-inference-server…
Browse files Browse the repository at this point in the history
  • Loading branch information
tanmayv25 authored Jan 26, 2021
1 parent 53a45b6 commit 68b5e89
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 33 deletions.
96 changes: 64 additions & 32 deletions src/clients/c++/perf_analyzer/inference_profiler.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -35,6 +35,48 @@
namespace perfanalyzer {

namespace {

inline uint64_t
AverageDurationInUs(const uint64_t total_time_in_ns, const uint64_t cnt)
{
return total_time_in_ns / (cnt * 1000);
}

EnsembleDurations
GetTotalEnsembleDurations(const ServerSideStats& stats)
{
EnsembleDurations result;
for (const auto& model_stats : stats.composing_models_stat) {
if (model_stats.second.composing_models_stat.empty()) {
const uint64_t cnt = model_stats.second.success_count;
if (cnt != 0) {
result.total_queue_time_us +=
AverageDurationInUs(model_stats.second.queue_time_ns, cnt);
result.total_compute_time_us +=
AverageDurationInUs(model_stats.second.compute_input_time_ns, cnt) +
AverageDurationInUs(model_stats.second.compute_infer_time_ns, cnt) +
AverageDurationInUs(model_stats.second.compute_output_time_ns, cnt);
}
} else {
const auto this_ensemble_duration =
GetTotalEnsembleDurations(model_stats.second);
result.total_queue_time_us += this_ensemble_duration.total_queue_time_us;
result.total_compute_time_us +=
this_ensemble_duration.total_compute_time_us;
}
}
return result;
}


size_t
GetOverheadDuration(size_t total_time, size_t queue_time, size_t compute_time)
{
return (total_time > queue_time + compute_time)
? (total_time - queue_time - compute_time)
: 0;
}

cb::Error
ReportServerSideStats(const ServerSideStats& stats, const int iteration)
{
Expand All @@ -49,50 +91,40 @@ ReportServerSideStats(const ServerSideStats& stats, const int iteration)
return cb::Error::Success;
}

const uint64_t cumm_time_us = stats.cumm_time_ns / 1000;
const uint64_t cumm_avg_us = cumm_time_us / cnt;

const uint64_t queue_time_us = stats.queue_time_ns / 1000;
const uint64_t queue_avg_us = queue_time_us / cnt;

const uint64_t compute_input_time_us = stats.compute_input_time_ns / 1000;
const uint64_t compute_input_avg_us = compute_input_time_us / cnt;

const uint64_t compute_infer_time_us = stats.compute_infer_time_ns / 1000;
const uint64_t compute_infer_avg_us = compute_infer_time_us / cnt;

const uint64_t compute_output_time_us = stats.compute_output_time_ns / 1000;
const uint64_t compute_output_avg_us = compute_output_time_us / cnt;
const uint64_t cumm_avg_us = AverageDurationInUs(stats.cumm_time_ns, cnt);

const uint64_t compute_avg_us =
compute_input_avg_us + compute_infer_avg_us + compute_output_avg_us;
const uint64_t overhead = (cumm_avg_us > queue_avg_us + compute_avg_us)
? (cumm_avg_us - queue_avg_us - compute_avg_us)
: 0;
std::cout << ident << " Inference count: " << infer_cnt << std::endl
<< ident << " Execution count: " << exec_cnt << std::endl
<< ident << " Successful request count: " << exec_cnt << std::endl
<< ident << " Avg request latency: " << cumm_avg_us << " usec";
if (stats.composing_models_stat.empty()) {
std::cout << " (overhead " << overhead << " usec + "
const uint64_t queue_avg_us = AverageDurationInUs(stats.queue_time_ns, cnt);
const uint64_t compute_input_avg_us =
AverageDurationInUs(stats.compute_input_time_ns, cnt);
const uint64_t compute_infer_avg_us =
AverageDurationInUs(stats.compute_infer_time_ns, cnt);
const uint64_t compute_output_avg_us =
AverageDurationInUs(stats.compute_output_time_ns, cnt);
const uint64_t compute_avg_us =
compute_input_avg_us + compute_infer_avg_us + compute_output_avg_us;
std::cout << " (overhead "
<< GetOverheadDuration(cumm_avg_us, queue_avg_us, compute_avg_us)
<< " usec + "
<< "queue " << queue_avg_us << " usec + "
<< "compute input " << compute_input_avg_us << " usec + "
<< "compute infer " << compute_infer_avg_us << " usec + "
<< "compute output " << compute_output_avg_us << " usec)"
<< std::endl
<< std::endl;
} else {
std::cout << std::endl;
std::cout << ident
<< " Total avg compute input time : " << compute_input_avg_us
<< " usec" << std::endl;
std::cout << ident
<< " Total avg compute infer time : " << compute_infer_avg_us
<< " usec" << std::endl;
std::cout << ident
<< " Total avg compute output time : " << compute_output_avg_us
<< " usec" << std::endl;
std::cout << ident << " Total avg queue time : " << queue_avg_us << " usec"
const auto ensemble_times = GetTotalEnsembleDurations(stats);
std::cout << " (overhead "
<< GetOverheadDuration(
cumm_avg_us, ensemble_times.total_queue_time_us,
ensemble_times.total_compute_time_us)
<< " usec + "
<< "queue " << ensemble_times.total_queue_time_us << " usec + "
<< "compute " << ensemble_times.total_compute_time_us << " usec)"
<< std::endl
<< std::endl;

Expand Down
10 changes: 9 additions & 1 deletion src/clients/c++/perf_analyzer/inference_profiler.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -55,6 +55,14 @@ struct LoadStatus {
uint64_t avg_latency = 0;
};

// Holds the total of the timiming components of composing models of an
// ensemble.
struct EnsembleDurations {
EnsembleDurations() : total_queue_time_us(0), total_compute_time_us(0) {}
uint64_t total_queue_time_us;
uint64_t total_compute_time_us;
};

/// Holds the server-side inference statisitcs of the target model and its
/// composing models
struct ServerSideStats {
Expand Down

0 comments on commit 68b5e89

Please sign in to comment.